This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
Re: PATCH: 5/6 [2nd try]: Add AVX support (i387 changes)
> Date: Sat, 6 Mar 2010 14:22:12 -0800
> From: "H.J. Lu" <hongjiu.lu@intel.com>
>
> Hi,
>
> Here are i387 changes to support AVX. OK to install?
I can't help thinking that the i387_supply_xsave/i387_collect_xsave
functions can be written in a simpler way, but I guess for now this is
acceptable. Hope you don't mind if a I rewrite that logic at some
point though.
> H.J.
> ---
> 2010-03-06 H.J. Lu <hongjiu.lu@intel.com>
>
> * i387-tdep.c: Include "i386-xstate.h".
> (XSAVE_XSTATE_BV_ADDR): New.
> (xsave_avxh_offset): Likewise.
> (XSAVE_AVXH_ADDR): Likewise.
> (i387_supply_xsave): Likewise.
> (i387_collect_xsave): Likewise.
>
> * i387-tdep.h (I387_NUM_YMM_REGS): New.
> (I387_YMM0H_REGNUM): Likewise.
> (I387_YMMENDH_REGNUM): Likewise.
> (i387_supply_xsave): Likewise.
> (i387_collect_xsave): Likewise.
>
> diff --git a/gdb/i387-tdep.c b/gdb/i387-tdep.c
> index 3fb5b56..197af7f 100644
> --- a/gdb/i387-tdep.c
> +++ b/gdb/i387-tdep.c
> @@ -34,6 +34,7 @@
>
> #include "i386-tdep.h"
> #include "i387-tdep.h"
> +#include "i386-xstate.h"
>
> /* Print the floating point number specified by RAW. */
>
> @@ -677,6 +678,518 @@ i387_collect_fxsave (const struct regcache *regcache, int regnum, void *fxsave)
> FXSAVE_MXCSR_ADDR (regs));
> }
>
> +/* `xstate_bv' is at byte offset 512. */
> +#define XSAVE_XSTATE_BV_ADDR(xsave) (xsave + 512)
> +
> +/* At xsave_avxh_offset[REGNUM] you'll find the offset to the location in
> + the upper 128bit of AVX register data structure used by the "xsave"
> + instruction where GDB register REGNUM is stored. */
> +
> +static int xsave_avxh_offset[] =
> +{
> + 576 + 0 * 16, /* Upper 128bit of %ymm0 through ... */
> + 576 + 1 * 16,
> + 576 + 2 * 16,
> + 576 + 3 * 16,
> + 576 + 4 * 16,
> + 576 + 5 * 16,
> + 576 + 6 * 16,
> + 576 + 7 * 16,
> + 576 + 8 * 16,
> + 576 + 9 * 16,
> + 576 + 10 * 16,
> + 576 + 11 * 16,
> + 576 + 12 * 16,
> + 576 + 13 * 16,
> + 576 + 14 * 16,
> + 576 + 15 * 16 /* Upper 128bit of ... %ymm15 (128 bits each). */
> +};
> +
> +#define XSAVE_AVXH_ADDR(tdep, xsave, regnum) \
> + (xsave + xsave_avxh_offset[regnum - I387_YMM0H_REGNUM (tdep)])
> +
> +/* Similar to i387_supply_fxsave, but use XSAVE extended state. */
> +
> +void
> +i387_supply_xsave (struct regcache *regcache, int regnum,
> + const void *xsave)
> +{
> + struct gdbarch_tdep *tdep = gdbarch_tdep (get_regcache_arch (regcache));
> + const gdb_byte *regs = xsave;
> + int i;
> + unsigned int clear_bv;
> + const gdb_byte *p;
> + enum
> + {
> + none = 0x0,
> + x87 = 0x1,
> + sse = 0x2,
> + avxh = 0x4,
> + all = x87 | sse | avxh
> + } regclass;
> +
> + gdb_assert (tdep->st0_regnum >= I386_ST0_REGNUM);
> + gdb_assert (tdep->num_xmm_regs > 0);
> +
> + if (regnum == -1)
> + regclass = all;
> + else if (regnum >= I387_YMM0H_REGNUM (tdep)
> + && regnum < I387_YMMENDH_REGNUM (tdep))
> + regclass = avxh;
> + else if (regnum >= I387_XMM0_REGNUM(tdep)
> + && regnum < I387_MXCSR_REGNUM (tdep))
> + regclass = sse;
> + else if (regnum >= I387_ST0_REGNUM (tdep)
> + && regnum < I387_FCTRL_REGNUM (tdep))
> + regclass = x87;
> + else
> + regclass = none;
> +
> + if (regs != NULL && regclass != none)
> + {
> + /* Get `xstat_bv'. */
> + const gdb_byte *xstate_bv_p = XSAVE_XSTATE_BV_ADDR (regs);
> +
> + /* The supported bits in `xstat_bv' are 1 byte. Clear part in
> + vector registers if its bit in xstat_bv is zero. */
> + clear_bv = (~(*xstate_bv_p)) & tdep->xcr0;
> + }
> + else
> + clear_bv = I386_XSTATE_MAX_MASK;
> +
> + switch (regclass)
> + {
> + case none:
> + break;
> +
> + case avxh:
> + if ((clear_bv & bit_I386_XSTATE_AVX))
> + p = NULL;
> + else
> + p = XSAVE_AVXH_ADDR (tdep, regs, regnum);
> + regcache_raw_supply (regcache, regnum, p);
> + return;
> +
> + case sse:
> + if ((clear_bv & bit_I386_XSTATE_SSE))
> + p = NULL;
> + else
> + p = FXSAVE_ADDR (tdep, regs, regnum);
> + regcache_raw_supply (regcache, regnum, p);
> + return;
> +
> + case x87:
> + if ((clear_bv & bit_I386_XSTATE_X87))
> + p = NULL;
> + else
> + p = FXSAVE_ADDR (tdep, regs, regnum);
> + regcache_raw_supply (regcache, regnum, p);
> + return;
> +
> + case all:
> + /* Hanle the upper YMM registers. */
> + if ((tdep->xcr0 & bit_I386_XSTATE_AVX))
> + {
> + if ((clear_bv & bit_I386_XSTATE_AVX))
> + p = NULL;
> + else
> + p = regs;
> +
> + for (i = I387_YMM0H_REGNUM (tdep);
> + i < I387_YMMENDH_REGNUM (tdep); i++)
> + {
> + if (p != NULL)
> + p = XSAVE_AVXH_ADDR (tdep, regs, i);
> + regcache_raw_supply (regcache, i, p);
> + }
> + }
> +
> + /* Handle the XMM registers. */
> + if ((tdep->xcr0 & bit_I386_XSTATE_SSE))
> + {
> + if ((clear_bv & bit_I386_XSTATE_SSE))
> + p = NULL;
> + else
> + p = regs;
> +
> + for (i = I387_XMM0_REGNUM (tdep);
> + i < I387_MXCSR_REGNUM (tdep); i++)
> + {
> + if (p != NULL)
> + p = FXSAVE_ADDR (tdep, regs, i);
> + regcache_raw_supply (regcache, i, p);
> + }
> + }
> +
> + /* Handle the x87 registers. */
> + if ((tdep->xcr0 & bit_I386_XSTATE_X87))
> + {
> + if ((clear_bv & bit_I386_XSTATE_X87))
> + p = NULL;
> + else
> + p = regs;
> +
> + for (i = I387_ST0_REGNUM (tdep);
> + i < I387_FCTRL_REGNUM (tdep); i++)
> + {
> + if (p != NULL)
> + p = FXSAVE_ADDR (tdep, regs, i);
> + regcache_raw_supply (regcache, i, p);
> + }
> + }
> + break;
> + }
> +
> + /* Only handle x87 control registers. */
> + for (i = I387_FCTRL_REGNUM (tdep); i < I387_XMM0_REGNUM (tdep); i++)
> + if (regnum == -1 || regnum == i)
> + {
> + if (regs == NULL)
> + {
> + regcache_raw_supply (regcache, i, NULL);
> + continue;
> + }
> +
> + /* Most of the FPU control registers occupy only 16 bits in
> + the xsave extended state. Give those a special treatment. */
> + if (i != I387_FIOFF_REGNUM (tdep)
> + && i != I387_FOOFF_REGNUM (tdep))
> + {
> + gdb_byte val[4];
> +
> + memcpy (val, FXSAVE_ADDR (tdep, regs, i), 2);
> + val[2] = val[3] = 0;
> + if (i == I387_FOP_REGNUM (tdep))
> + val[1] &= ((1 << 3) - 1);
> + else if (i== I387_FTAG_REGNUM (tdep))
> + {
> + /* The fxsave area contains a simplified version of
> + the tag word. We have to look at the actual 80-bit
> + FP data to recreate the traditional i387 tag word. */
> +
> + unsigned long ftag = 0;
> + int fpreg;
> + int top;
> +
> + top = ((FXSAVE_ADDR (tdep, regs,
> + I387_FSTAT_REGNUM (tdep)))[1] >> 3);
> + top &= 0x7;
> +
> + for (fpreg = 7; fpreg >= 0; fpreg--)
> + {
> + int tag;
> +
> + if (val[0] & (1 << fpreg))
> + {
> + int regnum = (fpreg + 8 - top) % 8
> + + I387_ST0_REGNUM (tdep);
> + tag = i387_tag (FXSAVE_ADDR (tdep, regs, regnum));
> + }
> + else
> + tag = 3; /* Empty */
> +
> + ftag |= tag << (2 * fpreg);
> + }
> + val[0] = ftag & 0xff;
> + val[1] = (ftag >> 8) & 0xff;
> + }
> + regcache_raw_supply (regcache, i, val);
> + }
> + else
> + regcache_raw_supply (regcache, i, FXSAVE_ADDR (tdep, regs, i));
> + }
> +
> + if (regnum == I387_MXCSR_REGNUM (tdep) || regnum == -1)
> + {
> + p = regs == NULL ? NULL : FXSAVE_MXCSR_ADDR (regs);
> + regcache_raw_supply (regcache, I387_MXCSR_REGNUM (tdep), p);
> + }
> +}
> +
> +/* Similar to i387_collect_fxsave, but use XSAVE extended state. */
> +
> +void
> +i387_collect_xsave (const struct regcache *regcache, int regnum,
> + void *xsave, int gcore)
> +{
> + struct gdbarch_tdep *tdep = gdbarch_tdep (get_regcache_arch (regcache));
> + gdb_byte *regs = xsave;
> + int i;
> + enum
> + {
> + none = 0x0,
> + check = 0x1,
> + x87 = 0x2 | check,
> + sse = 0x4 | check,
> + avxh = 0x8 | check,
> + all = x87 | sse | avxh
> + } regclass;
> +
> + gdb_assert (tdep->st0_regnum >= I386_ST0_REGNUM);
> + gdb_assert (tdep->num_xmm_regs > 0);
> +
> + if (regnum == -1)
> + regclass = all;
> + else if (regnum >= I387_YMM0H_REGNUM (tdep)
> + && regnum < I387_YMMENDH_REGNUM (tdep))
> + regclass = avxh;
> + else if (regnum >= I387_XMM0_REGNUM(tdep)
> + && regnum < I387_MXCSR_REGNUM (tdep))
> + regclass = sse;
> + else if (regnum >= I387_ST0_REGNUM (tdep)
> + && regnum < I387_FCTRL_REGNUM (tdep))
> + regclass = x87;
> + else
> + regclass = none;
> +
> + if (gcore)
> + {
> + /* Update XCR0 and `xstate_bv' with XCR0 for gcore. */
> + if (tdep->xsave_xcr0_offset != -1)
> + memcpy (regs + tdep->xsave_xcr0_offset, &tdep->xcr0, 8);
> + memcpy (XSAVE_XSTATE_BV_ADDR (regs), &tdep->xcr0, 8);
> +
> + switch (regclass)
> + {
> + default:
> + abort ();
> +
> + case all:
> + /* Handle the upper YMM registers. */
> + if ((tdep->xcr0 & bit_I386_XSTATE_AVX))
> + for (i = I387_YMM0H_REGNUM (tdep);
> + i < I387_YMMENDH_REGNUM (tdep); i++)
> + regcache_raw_collect (regcache, i,
> + XSAVE_AVXH_ADDR (tdep, regs, i));
> +
> + /* Handle the XMM registers. */
> + if ((tdep->xcr0 & bit_I386_XSTATE_SSE))
> + for (i = I387_XMM0_REGNUM (tdep);
> + i < I387_MXCSR_REGNUM (tdep); i++)
> + regcache_raw_collect (regcache, i,
> + FXSAVE_ADDR (tdep, regs, i));
> +
> + /* Handle the x87 registers. */
> + if ((tdep->xcr0 & bit_I386_XSTATE_X87))
> + for (i = I387_ST0_REGNUM (tdep);
> + i < I387_FCTRL_REGNUM (tdep); i++)
> + regcache_raw_collect (regcache, i,
> + FXSAVE_ADDR (tdep, regs, i));
> + break;
> +
> + case x87:
> + regcache_raw_collect (regcache, regnum,
> + FXSAVE_ADDR (tdep, regs, regnum));
> + return;
> +
> + case sse:
> + regcache_raw_collect (regcache, regnum,
> + FXSAVE_ADDR (tdep, regs, regnum));
> + return;
> +
> + case avxh:
> + regcache_raw_collect (regcache, regnum,
> + XSAVE_AVXH_ADDR (tdep, regs, regnum));
> + return;
> + }
> + }
> + else
> + {
> + if ((regclass & check))
> + {
> + gdb_byte raw[I386_MAX_REGISTER_SIZE];
> + gdb_byte *xstate_bv_p = XSAVE_XSTATE_BV_ADDR (regs);
> + unsigned int xstate_bv = 0;
> + /* The supported bits in `xstat_bv' are 1 byte. */
> + unsigned int clear_bv = (~(*xstate_bv_p)) & tdep->xcr0;
> + gdb_byte *p;
> +
> + /* Clear register set if its bit in xstat_bv is zero. */
> + if (clear_bv)
> + {
> + if ((clear_bv & bit_I386_XSTATE_AVX))
> + for (i = I387_YMM0H_REGNUM (tdep);
> + i < I387_YMMENDH_REGNUM (tdep); i++)
> + memset (XSAVE_AVXH_ADDR (tdep, regs, i), 0, 16);
> +
> + if ((clear_bv & bit_I386_XSTATE_SSE))
> + for (i = I387_XMM0_REGNUM (tdep);
> + i < I387_MXCSR_REGNUM (tdep); i++)
> + memset (FXSAVE_ADDR (tdep, regs, i), 0, 16);
> +
> + if ((clear_bv & bit_I386_XSTATE_X87))
> + for (i = I387_ST0_REGNUM (tdep);
> + i < I387_FCTRL_REGNUM (tdep); i++)
> + memset (FXSAVE_ADDR (tdep, regs, i), 0, 10);
> + }
> +
> + if (regclass == all)
> + {
> + /* Check if any upper YMM registers are changed. */
> + if ((tdep->xcr0 & bit_I386_XSTATE_AVX))
> + for (i = I387_YMM0H_REGNUM (tdep);
> + i < I387_YMMENDH_REGNUM (tdep); i++)
> + {
> + regcache_raw_collect (regcache, i, raw);
> + p = XSAVE_AVXH_ADDR (tdep, regs, i);
> + if (memcmp (raw, p, 16))
> + {
> + xstate_bv |= bit_I386_XSTATE_AVX;
> + memcpy (p, raw, 16);
> + }
> + }
> +
> + /* Check if any SSE registers are changed. */
> + if ((tdep->xcr0 & bit_I386_XSTATE_SSE))
> + for (i = I387_XMM0_REGNUM (tdep);
> + i < I387_MXCSR_REGNUM (tdep); i++)
> + {
> + regcache_raw_collect (regcache, i, raw);
> + p = FXSAVE_ADDR (tdep, regs, i);
> + if (memcmp (raw, p, 16))
> + {
> + xstate_bv |= bit_I386_XSTATE_SSE;
> + memcpy (p, raw, 16);
> + }
> + }
> +
> + /* Check if any X87 registers are changed. */
> + if ((tdep->xcr0 & bit_I386_XSTATE_X87))
> + for (i = I387_ST0_REGNUM (tdep);
> + i < I387_FCTRL_REGNUM (tdep); i++)
> + {
> + regcache_raw_collect (regcache, i, raw);
> + p = FXSAVE_ADDR (tdep, regs, i);
> + if (memcmp (raw, p, 10))
> + {
> + xstate_bv |= bit_I386_XSTATE_X87;
> + memcpy (p, raw, 10);
> + }
> + }
> + }
> + else
> + {
> + /* Check if REGNUM is changed. */
> + regcache_raw_collect (regcache, regnum, raw);
> +
> + switch (regclass)
> + {
> + default:
> + abort ();
> +
> + case avxh:
> + /* This is an upper YMM register. */
> + p = XSAVE_AVXH_ADDR (tdep, regs, regnum);
> + if (memcmp (raw, p, 16))
> + {
> + xstate_bv |= bit_I386_XSTATE_AVX;
> + memcpy (p, raw, 16);
> + }
> + break;
> +
> + case sse:
> + /* This is an SSE register. */
> + p = FXSAVE_ADDR (tdep, regs, regnum);
> + if (memcmp (raw, p, 16))
> + {
> + xstate_bv |= bit_I386_XSTATE_SSE;
> + memcpy (p, raw, 16);
> + }
> + break;
> +
> + case x87:
> + /* This is an x87 register. */
> + p = FXSAVE_ADDR (tdep, regs, regnum);
> + if (memcmp (raw, p, 10))
> + {
> + xstate_bv |= bit_I386_XSTATE_X87;
> + memcpy (p, raw, 10);
> + }
> + break;
> + }
> + }
> +
> + /* Update the corresponding bits in `xstate_bv' if any SSE/AVX
> + registers are changed. */
> + if (xstate_bv)
> + {
> + /* The supported bits in `xstat_bv' are 1 byte. */
> + *xstate_bv_p |= (gdb_byte) xstate_bv;
> +
> + switch (regclass)
> + {
> + default:
> + abort ();
> +
> + case all:
> + break;
> +
> + case x87:
> + case sse:
> + case avxh:
> + /* Register REGNUM has been updated. Return. */
> + return;
> + }
> + }
> + else
> + {
> + /* Return if REGNUM isn't changed. */
> + if (regclass != all)
> + return;
> + }
> + }
> + }
> +
> + /* Only handle x87 control registers. */
> + for (i = I387_FCTRL_REGNUM (tdep); i < I387_XMM0_REGNUM (tdep); i++)
> + if (regnum == -1 || regnum == i)
> + {
> + /* Most of the FPU control registers occupy only 16 bits in
> + the xsave extended state. Give those a special treatment. */
> + if (i != I387_FIOFF_REGNUM (tdep)
> + && i != I387_FOOFF_REGNUM (tdep))
> + {
> + gdb_byte buf[4];
> +
> + regcache_raw_collect (regcache, i, buf);
> +
> + if (i == I387_FOP_REGNUM (tdep))
> + {
> + /* The opcode occupies only 11 bits. Make sure we
> + don't touch the other bits. */
> + buf[1] &= ((1 << 3) - 1);
> + buf[1] |= ((FXSAVE_ADDR (tdep, regs, i))[1] & ~((1 << 3) - 1));
> + }
> + else if (i == I387_FTAG_REGNUM (tdep))
> + {
> + /* Converting back is much easier. */
> +
> + unsigned short ftag;
> + int fpreg;
> +
> + ftag = (buf[1] << 8) | buf[0];
> + buf[0] = 0;
> + buf[1] = 0;
> +
> + for (fpreg = 7; fpreg >= 0; fpreg--)
> + {
> + int tag = (ftag >> (fpreg * 2)) & 3;
> +
> + if (tag != 3)
> + buf[0] |= (1 << fpreg);
> + }
> + }
> + memcpy (FXSAVE_ADDR (tdep, regs, i), buf, 2);
> + }
> + else
> + regcache_raw_collect (regcache, i, FXSAVE_ADDR (tdep, regs, i));
> + }
> +
> + if (regnum == I387_MXCSR_REGNUM (tdep) || regnum == -1)
> + regcache_raw_collect (regcache, I387_MXCSR_REGNUM (tdep),
> + FXSAVE_MXCSR_ADDR (regs));
> +}
> +
> /* Recreate the FTW (tag word) valid bits from the 80-bit FP data in
> *RAW. */
>
> diff --git a/gdb/i387-tdep.h b/gdb/i387-tdep.h
> index 645eb91..976fa11 100644
> --- a/gdb/i387-tdep.h
> +++ b/gdb/i387-tdep.h
> @@ -33,6 +33,8 @@ struct ui_file;
> #define I387_ST0_REGNUM(tdep) ((tdep)->st0_regnum)
> #define I387_NUM_XMM_REGS(tdep) ((tdep)->num_xmm_regs)
> #define I387_MM0_REGNUM(tdep) ((tdep)->mm0_regnum)
> +#define I387_NUM_YMM_REGS(tdep) ((tdep)->num_ymm_regs)
> +#define I387_YMM0H_REGNUM(tdep) ((tdep)->ymm0h_regnum)
>
> #define I387_FCTRL_REGNUM(tdep) (I387_ST0_REGNUM (tdep) + 8)
> #define I387_FSTAT_REGNUM(tdep) (I387_FCTRL_REGNUM (tdep) + 1)
> @@ -45,6 +47,8 @@ struct ui_file;
> #define I387_XMM0_REGNUM(tdep) (I387_ST0_REGNUM (tdep) + 16)
> #define I387_MXCSR_REGNUM(tdep) \
> (I387_XMM0_REGNUM (tdep) + I387_NUM_XMM_REGS (tdep))
> +#define I387_YMMENDH_REGNUM(tdep) \
> + (I387_YMM0H_REGNUM (tdep) + I387_NUM_YMM_REGS (tdep))
>
> /* Print out the i387 floating point state. */
>
> @@ -99,6 +103,11 @@ extern void i387_collect_fsave (const struct regcache *regcache, int regnum,
> extern void i387_supply_fxsave (struct regcache *regcache, int regnum,
> const void *fxsave);
>
> +/* Similar to i387_supply_fxsave, but use XSAVE extended state. */
> +
> +extern void i387_supply_xsave (struct regcache *regcache, int regnum,
> + const void *xsave);
> +
> /* Fill register REGNUM (if it is a floating-point or SSE register) in
> *FXSAVE with the value from REGCACHE. If REGNUM is -1, do this for
> all registers. This function doesn't touch any of the reserved
> @@ -107,6 +116,11 @@ extern void i387_supply_fxsave (struct regcache *regcache, int regnum,
> extern void i387_collect_fxsave (const struct regcache *regcache, int regnum,
> void *fxsave);
>
> +/* Similar to i387_collect_fxsave, but use XSAVE extended state. */
> +
> +extern void i387_collect_xsave (const struct regcache *regcache,
> + int regnum, void *xsave, int gcore);
> +
> /* Prepare the FPU stack in REGCACHE for a function return. */
>
> extern void i387_return_value (struct gdbarch *gdbarch,
>