[PATCH] x86-64: fix ZMM register state tracking

Metzger, Markus T markus.t.metzger@intel.com
Mon Sep 24 17:19:00 GMT 2018


Hello Jan,

> The three AVX512 state components are entirely independent - one being in its "init
> state" has no implication whatsoever on either of the other two. Fully separate
> X86_XSTATE_ZMM_H and X86_XSTATE_ZMM handling, to prevent upper halves of
> the upper 16 ZMM registers to display as if they were zero (when they aren't) after
> e.g. VZEROALL/VZEROUPPER.
> 
> gdb/
> 2018-09-05  Jan Beulich  <jbeulich@suse.com>
> 
> 	* i387-tdep.c (i387_supply_xsave): Split handling of
> 	X86_XSTATE_ZMM_H and X86_XSTATE_ZMM.
> 	(i387_collect_xsave): Likewise.
> 
> --- a/gdb/i387-tdep.c
> +++ b/gdb/i387-tdep.c
> @@ -923,7 +923,8 @@ i387_supply_xsave (struct regcache *regc
>    enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
>    struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
>    const gdb_byte *regs = (const gdb_byte *) xsave;
> -  int i;
> +  int i, zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
> +			    + std::min (tdep->num_zmm_regs, 16);

It would be nice to comment on this magic 16 and the min operation.
It's how XSAVE organizes things but it isn't entirely intuitive.


>    ULONGEST clear_bv;
>    static const gdb_byte zero[I386_MAX_REGISTER_SIZE] = { 0 };
>    enum
> @@ -1002,7 +1003,8 @@ i387_supply_xsave (struct regcache *regc
>        return;
> 
>      case avx512_zmm_h:
> -      if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
> +      if ((clear_bv & (regnum < zmm_endlo_regnum ? X86_XSTATE_ZMM_H
> +						 : X86_XSTATE_ZMM)))

A comment that XSAVE stores the lower 16 registers in a different place
than the higher 16 registers and also guards them by different XCR0 bits
would be nice.

We hid the different places behind those XSAVE_AVX512_ZMM_H_ADDR
macros but there's nothing similar for the guard bits.  Maybe add macros
for the guard check, as well?


>  	regcache->raw_supply (regnum, zero);
>        else
>  	regcache->raw_supply (regnum,
> @@ -1080,21 +1082,17 @@ i387_supply_xsave (struct regcache *regc
>  	    }
>  	}
> 
> -      /* Handle the upper ZMM registers.  */
> -      if ((tdep->xcr0 & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
> +      /* Handle the upper halves of the low 8/16 ZMM registers.  */
> +      if ((tdep->xcr0 & X86_XSTATE_ZMM_H))
>  	{
> -	  if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
> +	  if ((clear_bv & X86_XSTATE_ZMM_H))
>  	    {
> -	      for (i = I387_ZMM0H_REGNUM (tdep);
> -		   i < I387_ZMMENDH_REGNUM (tdep);
> -		   i++)
> +	      for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
>  		regcache->raw_supply (i, zero);
>  	    }
>  	  else
>  	    {
> -	      for (i = I387_ZMM0H_REGNUM (tdep);
> -		   i < I387_ZMMENDH_REGNUM (tdep);
> -		   i++)
> +	      for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
>  		regcache->raw_supply (i,
>  				      XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i));
>  	    }
> @@ -1119,11 +1117,13 @@ i387_supply_xsave (struct regcache *regc
>  	    }
>  	}
> 
> -      /* Handle the YMM_AVX512 registers.  */
> +      /* Handle the upper 16 ZMM/YMM/XMM registers (if any).  */
>        if ((tdep->xcr0 & X86_XSTATE_ZMM))
>  	{
>  	  if ((clear_bv & X86_XSTATE_ZMM))
>  	    {
> +	      for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
> +		regcache->raw_supply (i, zero);
>  	      for (i = I387_YMM16H_REGNUM (tdep);
>  		   i < I387_YMMH_AVX512_END_REGNUM (tdep);
>  		   i++)
> @@ -1135,6 +1135,9 @@ i387_supply_xsave (struct regcache *regc
>  	    }
>  	  else
>  	    {
> +	      for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
> +		regcache->raw_supply (i,
> +				      XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i));

This covers the upper halves of zmm16 to zmm31.  Looking at the function it looks
like the lower halves are covered in separate cases avx512_ymmh_avx512 and
avx512_xmmh_avx512.  Maybe reflect this in the comment?  It currently suggests
that it handles the entire upper 16 registers.


>  	      for (i = I387_YMM16H_REGNUM (tdep);
>  		   i < I387_YMMH_AVX512_END_REGNUM (tdep);
>  		   i++)
> @@ -1340,7 +1343,8 @@ i387_collect_xsave (const struct regcach
>    gdb_byte *p, *regs = (gdb_byte *) xsave;
>    gdb_byte raw[I386_MAX_REGISTER_SIZE];
>    ULONGEST initial_xstate_bv, clear_bv, xstate_bv = 0;
> -  unsigned int i;
> +  unsigned int i, zmm_endlo_regnum = I387_ZMM0H_REGNUM (tdep)
> +				     + std::min (tdep->num_zmm_regs, 16);
>    enum
>      {
>        x87_ctrl_or_mxcsr = 0x1,
> @@ -1441,9 +1445,8 @@ i387_collect_xsave (const struct regcach
>  	     i < I387_MPXEND_REGNUM (tdep); i++)
>  	  memset (XSAVE_MPX_ADDR (tdep, regs, i), 0, 8);
> 
> -      if ((clear_bv & (X86_XSTATE_ZMM_H | X86_XSTATE_ZMM)))
> -	for (i = I387_ZMM0H_REGNUM (tdep);
> -	     i < I387_ZMMENDH_REGNUM (tdep); i++)
> +      if ((clear_bv & X86_XSTATE_ZMM_H))
> +	for (i = I387_ZMM0H_REGNUM (tdep); i < zmm_endlo_regnum; i++)
>  	  memset (XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i), 0, 32);
> 
>        if ((clear_bv & X86_XSTATE_K))
> @@ -1453,6 +1456,8 @@ i387_collect_xsave (const struct regcach
> 
>        if ((clear_bv & X86_XSTATE_ZMM))
>  	{
> +	  for (i = zmm_endlo_regnum; i < I387_ZMMENDH_REGNUM (tdep); i++)
> +	    memset (XSAVE_AVX512_ZMM_H_ADDR (tdep, regs, i), 0, 32);
>  	  for (i = I387_YMM16H_REGNUM (tdep);
>  	       i < I387_YMMH_AVX512_END_REGNUM (tdep); i++)
>  	    memset (XSAVE_YMM_AVX512_ADDR (tdep, regs, i), 0, 16);

Looks OK to me.

Regards,
Markus.

Intel Deutschland GmbH
Registered Address: Am Campeon 10-12, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de
Managing Directors: Christin Eisenschmid, Christian Lamprechter
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928



More information about the Gdb-patches mailing list