PATCH: 3/6 [2nd try]: Add AVX support (i386 changes)

Sat Mar 27 15:48:00 GMT 2010

> Date: Sat, 6 Mar 2010 14:20:37 -0800
> From: "H.J. Lu" <hongjiu.lu@intel.com>
> 
> Hi,
> 
> Here are i386 changes to support AVX. OK to install?

OK, here's a review of the remainder of this part of the diff.  I'll
wait with reviewing the amd64 bits until we've got the i386 part
right, since a lot of what I'll say about i386 will also apply to
amd64.  OK?

> diff --git a/gdb/i386-linux-tdep.c b/gdb/i386-linux-tdep.c
> index b23c109..66ecf84 100644
> --- a/gdb/i386-linux-tdep.c
> +++ b/gdb/i386-linux-tdep.c
> @@ -23,6 +23,7 @@
>  #include "frame.h"
>  #include "value.h"
>  #include "regcache.h"
> +#include "regset.h"
>  #include "inferior.h"
>  #include "osabi.h"
>  #include "reggroups.h"
> @@ -36,9 +37,11 @@
>  #include "solib-svr4.h"
>  #include "symtab.h"
>  #include "arch-utils.h"
> -#include "regset.h"
>  #include "xml-syscall.h"
>  
> +#include "i387-tdep.h"
> +#include "i386-xstate.h"
> +
>  /* The syscall's XML filename for i386.  */
>  #define XML_SYSCALL_FILENAME_I386 "syscalls/i386-linux.xml"
>  
> @@ -47,13 +50,15 @@
>  #include <stdint.h>
>  
>  #include "features/i386/i386-linux.c"
> +#include "features/i386/i386-avx-linux.c"
>  
>  /* Supported register note sections.  */
> -static struct core_regset_section i386_linux_regset_sections[] =
> +struct core_regset_section i386_linux_regset_sections[] =

Why do you make this non-static?

>  {
>    { ".reg", 144, "general-purpose" },
>    { ".reg2", 108, "floating-point" },
>    { ".reg-xfp", 512, "extended floating-point" },
> +  { ".reg-xstate", 0, "XSAVE extended state" },
>    { NULL, 0 }
>  };
> @@ -560,6 +566,66 @@ static int i386_linux_sc_reg_offset[] =
>    0 * 4				/* %gs */
>  };
>  
> +/* Update XSAVE extended state register note section.  */
> +
> +void
> +i386_linux_update_xstateregset
> +  (struct core_regset_section *regset_sections, unsigned int xstate_size)
> +{
> +  int i;
> +
> +  /* Update the XSAVE extended state register note section for "gcore".
> +     Disable it if its size is 0.  */
> +  for (i = 0; regset_sections[i].sect_name != NULL; i++)
> +    if (strcmp (regset_sections[i].sect_name, ".reg-xstate") == 0)
> +      {
> +	if (xstate_size)
> +	  regset_sections[i].size = xstate_size;
> +	else
> +	  regset_sections[i].sect_name = NULL;
> +	break;
> +      }
> +}

What will happen if you have a single GDB connected to two different
remote targets, one with AVX support and one without?

> +/* Get XSAVE extended state xcr0 from core dump.  */
> +
> +unsigned long long
> +i386_linux_core_read_xcr0 (struct gdbarch *gdbarch,
> +			   struct target_ops *target, bfd *abfd)

If you follow my advice about using uint64_t for xr0, the return value
will have to be adjusted.

> +{
> +  asection *xstate = bfd_get_section_by_name (abfd, ".reg-xstate");
> +  unsigned long long xcr0;
> +
> +  if (xstate)
> +    {
> +      size_t size = bfd_section_size (abfd, xstate);
> +
> +      gdb_assert (size >= I386_XSTATE_SSE_SIZE);

Isn't a gdb_assert() here a bit harsh?  What happens if you simply return 0?

> +      /* Check extended state size.  */
> +      if (size < I386_XSTATE_AVX_SIZE)
> +	xcr0 = I386_XSTATE_SSE_MASK;
> +      else
> +	{
> +	  char contents[8];
> +
> +	  if (! bfd_get_section_contents (abfd, xstate, contents,
> +					  (file_ptr) I386_LINUX_XSAVE_XCR0_OFFSET,
> +					  8))

Is that cast really necessary?

>  /* Get Linux/x86 target description from core dump.  */
>  
>  static const struct target_desc *
> @@ -568,12 +634,17 @@ i386_linux_core_read_description (struct gdbarch *gdbarch,
>  				  bfd *abfd)
>  {
>    asection *section = bfd_get_section_by_name (abfd, ".reg2");
> +  unsigned long long xcr0;
>  
>    if (section == NULL)
>      return NULL;
>  
>    /* Linux/i386.  */
> -  return tdesc_i386_linux;
> +  xcr0 = i386_linux_core_read_xcr0 (gdbarch, target, abfd);
> +  if ((xcr0 & I386_XSTATE_AVX_MASK) == I386_XSTATE_AVX_MASK)
> +    return tdesc_i386_avx_linux;
> +  else
> +    return tdesc_i386_linux;
>  }
>  
>  static void
> @@ -623,6 +694,8 @@ i386_linux_init_abi (struct gdbarch_info info, struct gdbarch *gdbarch)
>    tdep->sc_reg_offset = i386_linux_sc_reg_offset;
>    tdep->sc_num_regs = ARRAY_SIZE (i386_linux_sc_reg_offset);
>  
> +  tdep->xsave_xcr0_offset = I386_LINUX_XSAVE_XCR0_OFFSET;
> +
>    set_gdbarch_process_record (gdbarch, i386_process_record);
>    set_gdbarch_process_record_signal (gdbarch, i386_linux_record_signal);
>  
> @@ -840,4 +913,5 @@ _initialize_i386_linux_tdep (void)
>  
>    /* Initialize the Linux target description  */
>    initialize_tdesc_i386_linux ();
> +  initialize_tdesc_i386_avx_linux ();
>  }
> diff --git a/gdb/i386-linux-tdep.h b/gdb/i386-linux-tdep.h
> index 11f7295..8881fea 100644
> --- a/gdb/i386-linux-tdep.h
> +++ b/gdb/i386-linux-tdep.h
> @@ -30,12 +30,45 @@
>  /* Register number for the "orig_eax" pseudo-register.  If this
>     pseudo-register contains a value >= 0 it is interpreted as the
>     system call number that the kernel is supposed to restart.  */
> -#define I386_LINUX_ORIG_EAX_REGNUM I386_SSE_NUM_REGS
> +#define I386_LINUX_ORIG_EAX_REGNUM I386_AVX_NUM_REGS
>  
>  /* Total number of registers for GNU/Linux.  */
>  #define I386_LINUX_NUM_REGS (I386_LINUX_ORIG_EAX_REGNUM + 1)
>  
> +/* Get XSAVE extended state xcr0 from core dump.  */
> +extern unsigned long long i386_linux_core_read_xcr0
> +  (struct gdbarch *gdbarch, struct target_ops *target, bfd *abfd);
> +
>  /* Linux target description.  */
>  extern struct target_desc *tdesc_i386_linux;
> +extern struct target_desc *tdesc_i386_avx_linux;
> +
> +/* Supported register note sections.  */
> +extern struct core_regset_section i386_linux_regset_sections[];
> +
> +/* Update XSAVE extended state register note section.  */
> +extern void i386_linux_update_xstateregset
> +  (struct core_regset_section *regset_sections, unsigned int xstate_size);
> +
> +/* Format of XSAVE extended state is:
> + 	struct
> +	{
> +	  fxsave_bytes[0..463]
> +	  sw_usable_bytes[464..511]
> +	  xstate_hdr_bytes[512..575]
> +	  avx_bytes[576..831]
> +	  future_state etc
> +	};
> +
> +  Same memory layout will be used for the coredump NT_X86_XSTATE
> +  representing the XSAVE extended state registers.
> +
> +  The first 8 bytes of the sw_usable_bytes[464..467] is set to OS enabled
> +  enabled state mask,  which is same as the 64bit mask returned by the
> +  xgetbv's XCR0). We can use this mask as well as the mask saved in the
> +  xstate_hdr bytes to interpret what states the processor/OS supports and
> +  what state is in, used/initialized conditions, for the particular
> +  process/thread.  */

Can you ask a native english speaker to look at this comment? 

> diff --git a/gdb/i386-tdep.c b/gdb/i386-tdep.c
> index 05afa56..8ced34a 100644
> --- a/gdb/i386-tdep.c
> +++ b/gdb/i386-tdep.c
> @@ -2183,6 +2241,59 @@ i387_ext_type (struct gdbarch *gdbarch)
>    return tdep->i387_ext_type;
>  }
>  
> +/* Construct vector type for pseudo XMM registers.  We can't use
> +   tdesc_find_type since XMM isn't described in target description.  */

I'm confused here.  If you have a non-AVX target, why do you need a 256-bit vector type?

> +static struct type *
> +i386_ymm_type (struct gdbarch *gdbarch)
> +{
> +  struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
> +
> +  if (!tdep->i386_ymm_type)
> +    {
> +      const struct builtin_type *bt = builtin_type (gdbarch);
> +
> +      /* The type we're building is this: */
> +#if 0
> +      union __gdb_builtin_type_vec256i
> +      {
> +        int128_t uint128[2];
> +        int64_t v2_int64[4];
> +        int32_t v4_int32[8];
> +        int16_t v8_int16[16];
> +        int8_t v16_int8[32];
> +        double v2_double[4];
> +        float v4_float[8];
> +      };
> +#endif
> +
> +      struct type *t;
> +
> +      t = arch_composite_type (gdbarch,
> +			       "__gdb_builtin_type_vec256i", TYPE_CODE_UNION);
> +      append_composite_type_field (t, "v8_float",
> +				   init_vector_type (bt->builtin_float, 8));
> +      append_composite_type_field (t, "v4_double",
> +				   init_vector_type (bt->builtin_double, 4));
> +      append_composite_type_field (t, "v32_int8",
> +				   init_vector_type (bt->builtin_int8, 32));
> +      append_composite_type_field (t, "v16_int16",
> +				   init_vector_type (bt->builtin_int16, 16));
> +      append_composite_type_field (t, "v8_int32",
> +				   init_vector_type (bt->builtin_int32, 8));
> +      append_composite_type_field (t, "v4_int64",
> +				   init_vector_type (bt->builtin_int64, 4));
> +      append_composite_type_field (t, "v2_int128",
> +				   init_vector_type (bt->builtin_int128, 2));
> +
> +      TYPE_VECTOR (t) = 1;
> +      TYPE_NAME (t) = "builtin_type_vec128i";
> +      tdep->i386_ymm_type = t;
> +    }
> +
> +  return tdep->i386_ymm_type;
> +}
> +
>  /* Construct vector type for MMX registers.  */
>  static struct type *
>  i386_mmx_type (struct gdbarch *gdbarch)
> @@ -2233,6 +2344,8 @@ i386_pseudo_register_type (struct gdbarch *gdbarch, int regnum)
>  {
>    if (i386_mmx_regnum_p (gdbarch, regnum))
>      return i386_mmx_type (gdbarch);
> +  else if (i386_ymm_regnum_p (gdbarch, regnum))
> +    return i386_ymm_type (gdbarch);
>    else
>      {
>        const struct builtin_type *bt = builtin_type (gdbarch);
> @@ -2284,7 +2397,22 @@ i386_pseudo_register_read (struct gdbarch *gdbarch, struct regcache *regcache,
>      {
>        struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
>  
> -      if (i386_word_regnum_p (gdbarch, regnum))
> +      if (i386_ymm_regnum_p (gdbarch, regnum))
> +	{
> +	  regnum -= tdep->ymm0_regnum;
> +
> +	  /* Extract (always little endian).  Read lower 16byte. */
> +	  regcache_raw_read (regcache,
> +			     I387_XMM0_REGNUM (tdep) + regnum,
> +			     raw_buf);
> +	  memcpy (buf, raw_buf, 16);
> +	  /* Read upper 16byte.  */
> +	  regcache_raw_read (regcache,
> +			     tdep->ymm0h_regnum + regnum,
> +			     raw_buf);
> +	  memcpy (buf + 16, raw_buf, 16);
> +	}
> +      else if (i386_word_regnum_p (gdbarch, regnum))
>  	{
>  	  int gpnum = regnum - tdep->ax_regnum;
>  
> @@ -2333,7 +2461,20 @@ i386_pseudo_register_write (struct gdbarch *gdbarch, struct regcache *regcache,
>      {
>        struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
>  
> -      if (i386_word_regnum_p (gdbarch, regnum))
> +      if (i386_ymm_regnum_p (gdbarch, regnum))
> +	{
> +	  regnum -= tdep->ymm0_regnum;
> +
> +	  /* ... Write lower 16byte.  */
> +	  regcache_raw_write (regcache,
> +			     I387_XMM0_REGNUM (tdep) + regnum,
> +			     buf);
> +	  /* ... Write upper 16byte.  */
> +	  regcache_raw_write (regcache,
> +			     tdep->ymm0h_regnum + regnum,
> +			     buf + 16);

Culd you change the comments here to say 128-bit instead of 16byte?

> @@ -5649,7 +5836,8 @@ i386_validate_tdesc_p (struct gdbarch_tdep *tdep,
>  		       struct tdesc_arch_data *tdesc_data)
>  {
>    const struct target_desc *tdesc = tdep->tdesc;
> -  const struct tdesc_feature *feature_core, *feature_vector;
> +  const struct tdesc_feature *feature_core;
> +  const struct tdesc_feature *feature_sse, *feature_avx;
>    int i, num_regs, valid_p;
>  
>    if (! tdesc_has_registers (tdesc))
> @@ -5659,13 +5847,37 @@ i386_validate_tdesc_p (struct gdbarch_tdep *tdep,
>    feature_core = tdesc_find_feature (tdesc, "org.gnu.gdb.i386.core");
>  
>    /* Get SSE registers.  */
> -  feature_vector = tdesc_find_feature (tdesc, "org.gnu.gdb.i386.sse");
> +  feature_sse = tdesc_find_feature (tdesc, "org.gnu.gdb.i386.sse");
>  
> -  if (feature_core == NULL || feature_vector == NULL)
> +  if (feature_core == NULL || feature_sse == NULL)
>      return 0;
>  
> +  /* Try AVX registers.  */
> +  feature_avx = tdesc_find_feature (tdesc, "org.gnu.gdb.i386.avx");
> +
>    valid_p = 1;
>  
> +  /* The XCR0 bits.  */
> +  if (feature_avx)
> +    {
> +      tdep->xcr0 = I386_XSTATE_AVX_MASK;
> +
> +      /* It may be set by ABI-specific.  */

Sorry, but does comment makes no sense to me.

> @@ -5854,9 +6071,13 @@ i386_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
>    set_tdesc_pseudo_register_type (gdbarch, i386_pseudo_register_type);
>    set_tdesc_pseudo_register_name (gdbarch, i386_pseudo_register_name);
>  
> -  /* The default ABI includes general-purpose registers, 
> -     floating-point registers, and the SSE registers.  */
> -  set_gdbarch_num_regs (gdbarch, I386_SSE_NUM_REGS);
> +  /* Override the normal target description method to make the AVX
> +     upper halves anonymous.  */
> +  set_gdbarch_register_name (gdbarch, i386_register_name);
> +
> +  /* The default ABI includes general-purpose registers, floating-point
> +     registers, the SSE registers and the upper AVX registers.  */
> +  set_gdbarch_num_regs (gdbarch, I386_AVX_NUM_REGS);

Isn't it better to leave the AVX registers out of the default target,
and only provide them if we're talking to a target (native or remote)
that indicates it supports them?

> @@ -5940,6 +6177,9 @@ i386_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
>    set_gdbarch_fast_tracepoint_valid_at (gdbarch,
>  					i386_fast_tracepoint_valid_at);
>  
> +  /* Tell remote stub that we support XML target description.  */
> +  set_gdbarch_qsupported (gdbarch, "x86=xml");

> @@ -146,9 +156,24 @@ struct gdbarch_tdep
>    /* Number of SSE registers.  */
>    int num_xmm_regs;
>  
> +  /* Bits of the extended control register 0 (the XFEATURE_ENABLED_MASK
> +     register), excluding the x87 bit, which are supported by this gdb.
> +   */
> +  unsigned long long xcr0;

GDB should be capitalized.