[PATCH] Handle bit offset and bit size in base types

Tom Tromey tom@tromey.com
Mon Aug 17 02:04:36 GMT 2020


PR symtab/25470 points out that the Zig programming language allows
integers of various bit sizes (including zero), not just sizes that
are a multiple of 8.

This is supported in DWARF by applying both a byte size and a
DW_AT_bit_size.

This patch adds support for this feature to integer and boolean types.
Other base types are not handled -- for floating-point types, this
didn't seem to make sense, and for character types I didn't see much
need.  (These can be added later if desired.)

I've also added support for DW_AT_data_bit_offset at the same time.  I
don't know whether the Zig compiler requires this, but it was
described in the same section in the DWARF standard and was easy to
add.

A new test case is supplied, using the DWARF assembler.

gdb/ChangeLog
2020-08-16  Tom Tromey  <tom@tromey.com>

	PR symtab/25470:
	* value.c (unpack_long, pack_long, pack_unsigned_long): Handle bit
	offset and bit size.
	* printcmd.c (print_scalar_formatted): Handle zero-length
	integer.
	(print_scalar_formatted): Use bit_size_differs_p.
	* gdbtypes.h (enum type_specific_kind) <TYPE_SPECIFIC_INT>: New
	constant.
	(union type_specific): <int_stuff>: New member.
	(struct type) <bit_size_differs_p, bit_size, bit_offset>: New
	methods.
	* gdbtypes.c (init_integer_type, init_boolean_type): Initialize
	TYPE_SPECIFIC_FIELD.
	(recursive_dump_type, copy_type_recursive): Update.
	* dwarf2/read.c (read_base_type): Handle DW_AT_bit_size and
	DW_AT_data_bit_offset.

gdb/testsuite/ChangeLog
2020-08-16  Tom Tromey  <tom@tromey.com>

	* gdb.dwarf2/intbits.exp: New file.
	* gdb.dwarf2/intbits.c: New file.
---
 gdb/ChangeLog                        |  19 +++
 gdb/dwarf2/read.c                    |  20 +++
 gdb/gdbtypes.c                       |  24 ++++
 gdb/gdbtypes.h                       |  43 +++++-
 gdb/printcmd.c                       |  16 ++-
 gdb/testsuite/ChangeLog              |   5 +
 gdb/testsuite/gdb.dwarf2/intbits.c   |  48 +++++++
 gdb/testsuite/gdb.dwarf2/intbits.exp | 201 +++++++++++++++++++++++++++
 gdb/value.c                          |  37 ++++-
 9 files changed, 406 insertions(+), 7 deletions(-)
 create mode 100644 gdb/testsuite/gdb.dwarf2/intbits.c
 create mode 100644 gdb/testsuite/gdb.dwarf2/intbits.exp

diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c
index 0ac8533263a..873bc240a99 100644
--- a/gdb/dwarf2/read.c
+++ b/gdb/dwarf2/read.c
@@ -18060,6 +18060,26 @@ read_base_type (struct die_info *die, struct dwarf2_cu *cu)
 
   TYPE_ENDIANITY_NOT_DEFAULT (type) = gdbarch_byte_order (arch) != byte_order;
 
+  if (TYPE_SPECIFIC_FIELD (type) == TYPE_SPECIFIC_INT)
+    {
+      attr = dwarf2_attr (die, DW_AT_bit_size, cu);
+      if (attr != nullptr && DW_UNSND (attr) <= 8 * TYPE_LENGTH (type))
+	{
+	  unsigned real_bit_size = DW_UNSND (attr);
+	  attr = dwarf2_attr (die, DW_AT_data_bit_offset, cu);
+	  /* Only use the attributes if they make sense together.  */
+	  if (attr == nullptr
+	      || DW_UNSND (attr) + real_bit_size <= 8 * TYPE_LENGTH (type))
+	    {
+	      TYPE_MAIN_TYPE (type)->type_specific.int_stuff.bit_size
+		= real_bit_size;
+	      if (attr != nullptr)
+		TYPE_MAIN_TYPE (type)->type_specific.int_stuff.bit_offset
+		  = DW_UNSND (attr);
+	    }
+	}
+    }
+
   return set_die_type (die, type, cu);
 }
 
diff --git a/gdb/gdbtypes.c b/gdb/gdbtypes.c
index da1c58c65c1..1764e1c60b4 100644
--- a/gdb/gdbtypes.c
+++ b/gdb/gdbtypes.c
@@ -3189,6 +3189,10 @@ init_integer_type (struct objfile *objfile,
   if (unsigned_p)
     TYPE_UNSIGNED (t) = 1;
 
+  TYPE_SPECIFIC_FIELD (t) = TYPE_SPECIFIC_INT;
+  TYPE_MAIN_TYPE (t)->type_specific.int_stuff.bit_size = bit;
+  TYPE_MAIN_TYPE (t)->type_specific.int_stuff.bit_offset = 0;
+
   return t;
 }
 
@@ -3223,6 +3227,10 @@ init_boolean_type (struct objfile *objfile,
   if (unsigned_p)
     TYPE_UNSIGNED (t) = 1;
 
+  TYPE_SPECIFIC_FIELD (t) = TYPE_SPECIFIC_INT;
+  TYPE_MAIN_TYPE (t)->type_specific.int_stuff.bit_size = bit;
+  TYPE_MAIN_TYPE (t)->type_specific.int_stuff.bit_offset = 0;
+
   return t;
 }
 
@@ -5162,6 +5170,16 @@ recursive_dump_type (struct type *type, int spaces)
 	gdb_print_host_address (TYPE_SELF_TYPE (type), gdb_stdout);
 	puts_filtered ("\n");
 	break;
+
+    case TYPE_SPECIFIC_INT:
+      if (type->bit_size_differs_p ())
+	{
+	  unsigned bit_size = type->bit_size ();
+	  unsigned bit_off = type->bit_offset ();
+	  printfi_filtered (spaces, " bit size = %u, bit offset = %u\n",
+			    bit_size, bit_off);
+	}
+      break;
     }
 
   if (spaces == 0)
@@ -5385,6 +5403,12 @@ copy_type_recursive (struct objfile *objfile,
 			  copy_type_recursive (objfile, TYPE_SELF_TYPE (type),
 					       copied_types));
       break;
+    case TYPE_SPECIFIC_INT:
+      TYPE_SPECIFIC_FIELD (new_type) = TYPE_SPECIFIC_INT;
+      TYPE_MAIN_TYPE (new_type)->type_specific.int_stuff
+	= TYPE_MAIN_TYPE (type)->type_specific.int_stuff;
+      break;
+
     default:
       gdb_assert_not_reached ("bad type_specific_kind");
     }
diff --git a/gdb/gdbtypes.h b/gdb/gdbtypes.h
index 55a6dafb7e2..d779c803cc9 100644
--- a/gdb/gdbtypes.h
+++ b/gdb/gdbtypes.h
@@ -673,7 +673,8 @@ enum type_specific_kind
   TYPE_SPECIFIC_FLOATFORMAT,
   /* Note: This is used by TYPE_CODE_FUNC and TYPE_CODE_METHOD.  */
   TYPE_SPECIFIC_FUNC,
-  TYPE_SPECIFIC_SELF_TYPE
+  TYPE_SPECIFIC_SELF_TYPE,
+  TYPE_SPECIFIC_INT
 };
 
 union type_owner
@@ -838,6 +839,21 @@ union type_specific
      is a member of.  */
 
   struct type *self_type;
+
+  /* * An integer-like scalar type may be stored in just part of its
+     enclosing storage bytes.  This structure describes this
+     situation.  */
+  struct
+  {
+    /* * The bit size of the integer.  This can be 0.  For integers
+       that fill their storage (the ordinary case), this field holds
+       the byte size times 8.  */
+    unsigned short bit_size;
+    /* * The bit offset of the integer.  This is ordinarily 0, and can
+       only be non-zero if the bit size is less than the storage
+       size.  */
+    unsigned short bit_offset;
+  } int_stuff;
 };
 
 /* * Main structure representing a type in GDB.
@@ -1081,6 +1097,31 @@ struct type
   /* * Remove dynamic property of kind KIND from this type, if it exists.  */
   void remove_dyn_prop (dynamic_prop_node_kind kind);
 
+  /* * Return true if this is an integer type whose logical (bit) size
+     differs from its storage size; false otherwise.  Always return
+     false for non-integer (i.e., non-TYPE_SPECIFIC_INT) types.  */
+  bool bit_size_differs_p () const
+  {
+    return (main_type->type_specific_field == TYPE_SPECIFIC_INT
+	    && main_type->type_specific.int_stuff.bit_size != 8 * length);
+  }
+
+  /* * Return the logical (bit) size for this integer type.  Only
+     valid for integer (TYPE_SPECIFIC_INT) types.  */
+  unsigned short bit_size () const
+  {
+    gdb_assert (main_type->type_specific_field == TYPE_SPECIFIC_INT);
+    return main_type->type_specific.int_stuff.bit_size;
+  }
+
+  /* * Return the bit offset for this integer type.  Only valid for
+     integer (TYPE_SPECIFIC_INT) types.  */
+  unsigned short bit_offset () const
+  {
+    gdb_assert (main_type->type_specific_field == TYPE_SPECIFIC_INT);
+    return main_type->type_specific.int_stuff.bit_offset;
+  }
+
   /* * Type that is a pointer to this type.
      NULL if no such pointer-to type is known yet.
      The debugger may add the address of such a type
diff --git a/gdb/printcmd.c b/gdb/printcmd.c
index 309d2cabfff..fede8a08151 100644
--- a/gdb/printcmd.c
+++ b/gdb/printcmd.c
@@ -374,6 +374,15 @@ print_scalar_formatted (const gdb_byte *valaddr, struct type *type,
 	valaddr += TYPE_LENGTH (type) - len;
     }
 
+  /* Allow LEN == 0, and in this case, don't assume that VALADDR is
+     valid.  */
+  const gdb_byte zero = 0;
+  if (len == 0)
+    {
+      len = 1;
+      valaddr = &zero;
+    }
+
   if (size != 0 && (options->format == 'x' || options->format == 't'))
     {
       /* Truncate to fit.  */
@@ -404,8 +413,8 @@ print_scalar_formatted (const gdb_byte *valaddr, struct type *type,
      long, and then printing the long.  PR cli/16242 suggests changing
      this to using C-style hex float format.
 
-     Biased range types must also be unbiased here; the unbiasing is
-     done by unpack_long.  */
+     Biased range types and sub-word scalar types must also be handled
+     here; the value is correctly computed by unpack_long.  */
   gdb::byte_vector converted_bytes;
   /* Some cases below will unpack the value again.  In the biased
      range case, we want to avoid this, so we store the unpacked value
@@ -418,7 +427,8 @@ print_scalar_formatted (const gdb_byte *valaddr, struct type *type,
 	   || options->format == 'z'
 	   || options->format == 'd'
 	   || options->format == 'u'))
-      || (type->code () == TYPE_CODE_RANGE && type->bounds ()->bias != 0))
+      || (type->code () == TYPE_CODE_RANGE && type->bounds ()->bias != 0)
+      || type->bit_size_differs_p ())
     {
       val_long.emplace (unpack_long (type, valaddr));
       converted_bytes.resize (TYPE_LENGTH (type));
diff --git a/gdb/testsuite/gdb.dwarf2/intbits.c b/gdb/testsuite/gdb.dwarf2/intbits.c
new file mode 100644
index 00000000000..72b70d40903
--- /dev/null
+++ b/gdb/testsuite/gdb.dwarf2/intbits.c
@@ -0,0 +1,48 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* A 16 bit slot holding a 7-bit value of -1.  Note that, for all
+   these values, we explicitly set the endian-ness in the DWARF to
+   avoid issues.  */
+unsigned char i16_m1[2] = { 0x7f, 0 };
+
+/* A 16 bit slot holding a 1-bit value of 1 at offset 2.  */
+unsigned char u16_1[2] = { 0x4, 0 };
+
+/* A 32 bit slot holding a 17-bit value of -2.  */
+unsigned char u32_m2[4] = { 0xfe, 0xff, 0x01, 0 };
+
+/* A 32 bit slot holding a 31 bit value of 1.  The high bit should be
+   ignored when reading.  */
+unsigned char u32_1[4] = { 1, 0, 0, 0x80 };
+
+/* A 32 bit slot holding a 31 bit value of 1, offset by 1 bit.  */
+unsigned char u32_1_off[4] = { 2, 0, 0, 0 };
+
+/* A 32 bit slot holding a 30 bit value of 1, offset by 1 bit.
+   Big-endian.  */
+unsigned char be30_1_off[4] = { 0x80, 0, 0, 2 };
+
+/* A 32 bit slot holding a 0 bit value.  We don't use 0 in the array
+   here, to catch any situation where gdb tries to use the memory.  */
+unsigned char u32_0[4] = { 0xff, 0xff, 0xff, 0xff };
+
+int
+main (void)
+{
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.dwarf2/intbits.exp b/gdb/testsuite/gdb.dwarf2/intbits.exp
new file mode 100644
index 00000000000..311801869e9
--- /dev/null
+++ b/gdb/testsuite/gdb.dwarf2/intbits.exp
@@ -0,0 +1,201 @@
+# Copyright 2020 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Test support for non-byte-sized integer base types.
+
+load_lib dwarf.exp
+
+# This test can only be run on targets which support DWARF-2 and use gas.
+if {![dwarf2_support]} {
+    return 0
+}
+
+standard_testfile .c .S
+
+set executable ${testfile}
+set asm_file [standard_output_file ${srcfile2}]
+
+if [prepare_for_testing "failed to prepare" ${testfile} ${srcfile}] {
+    return -1
+}
+
+# Create the DWARF.
+Dwarf::assemble ${asm_file} {
+    cu {} {
+	DW_TAG_compile_unit {
+	    {DW_AT_language @DW_LANG_C_plus_plus}
+	} {
+	    declare_labels i7_type u1_type u17_type u31_type \
+		u31_1_type u32_0_type u0_0_type be30_1_type
+
+	    i7_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_signed}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "i7"}
+		{DW_AT_byte_size 2 DW_FORM_udata}
+		{DW_AT_bit_size 7 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_i16_m1"}
+		{DW_AT_type :${i7_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "i16_m1"]}
+		    SPECIAL_expr}
+	    }
+
+	    u1_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_boolean}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "u1"}
+		{DW_AT_byte_size 2 DW_FORM_udata}
+		{DW_AT_bit_size 1 DW_FORM_udata}
+		{DW_AT_data_bit_offset 2 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_u16_1"}
+		{DW_AT_type :${u1_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "u16_1"]}
+		    SPECIAL_expr}
+	    }
+
+	    u17_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_signed}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "u17"}
+		{DW_AT_byte_size 4 DW_FORM_udata}
+		{DW_AT_bit_size 17 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_u32_m2"}
+		{DW_AT_type :${u17_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "u32_m2"]}
+		    SPECIAL_expr}
+	    }
+
+	    u31_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_unsigned}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "u31"}
+		{DW_AT_byte_size 4 DW_FORM_udata}
+		{DW_AT_bit_size 31 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_u32_1"}
+		{DW_AT_type :${u31_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "u32_1"]}
+		    SPECIAL_expr}
+	    }
+
+	    u31_1_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_unsigned}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "u31_1"}
+		{DW_AT_byte_size 4 DW_FORM_udata}
+		{DW_AT_bit_size 31 DW_FORM_udata}
+		{DW_AT_data_bit_offset 1 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_u32_1_off"}
+		{DW_AT_type :${u31_1_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "u32_1_off"]}
+		    SPECIAL_expr}
+	    }
+
+	    be30_1_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_unsigned}
+		{DW_AT_endianity @DW_END_big}
+		{DW_AT_name "be30_1"}
+		{DW_AT_byte_size 4 DW_FORM_udata}
+		{DW_AT_bit_size 30 DW_FORM_udata}
+		{DW_AT_data_bit_offset 1 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_be30_1_off"}
+		{DW_AT_type :${be30_1_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "be30_1_off"]}
+		    SPECIAL_expr}
+	    }
+
+	    u32_0_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_unsigned}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "u32_0"}
+		{DW_AT_byte_size 4 DW_FORM_udata}
+		{DW_AT_bit_size 0 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_u32_0"}
+		{DW_AT_type :${u32_0_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "u32_0"]}
+		    SPECIAL_expr}
+	    }
+
+	    u0_0_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_unsigned}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "u0_0"}
+		{DW_AT_byte_size 0 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_u0_0"}
+		{DW_AT_type :${u0_0_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "u32_0"]}
+		    SPECIAL_expr}
+	    }
+	}
+    }
+}
+
+if {[prepare_for_testing "failed to prepare" ${executable} \
+	 [list ${asm_file} ${srcfile}] {}]} {
+    return -1
+}
+
+if {![runto_main]} {
+    return -1
+}
+
+gdb_test "print v_i16_m1" "= -1"
+gdb_test "print v_u16_1" "= true"
+gdb_test "print v_u32_m2" "= -2"
+gdb_test "print v_u32_1" "= 1"
+gdb_test "print v_u32_0" "= 0"
+gdb_test "print v_u0_0" "= 0"
+
+gdb_test "print v_i16_m1 = 7" "= 7"
+gdb_test "print v_i16_m1" "= 7" "print v_i16_m1 after assignment"
+
+gdb_test "print v_u32_1_off" "= 1"
+gdb_test "print v_u32_1_off = 7" " = 7"
+gdb_test "x/4xb &v_u32_1_off" ":\t0x0e\t0x00\t0x00\t0x00"
+
+gdb_test "print v_be30_1_off" "= 1"
+gdb_test "print v_be30_1_off = 7" " = 7"
+gdb_test "x/4xb &v_be30_1_off" ":\t0x00\t0x00\t0x00\t0x0e"
diff --git a/gdb/value.c b/gdb/value.c
index a6e21309f85..1819e50d639 100644
--- a/gdb/value.c
+++ b/gdb/value.c
@@ -2776,10 +2776,27 @@ unpack_long (struct type *type, const gdb_byte *valaddr)
     case TYPE_CODE_MEMBERPTR:
       {
 	LONGEST result;
-	if (nosign)
-	  result = extract_unsigned_integer (valaddr, len, byte_order);
+
+	if (type->bit_size_differs_p ())
+	  {
+	    unsigned bit_off = type->bit_offset ();
+	    unsigned bit_size = type->bit_size ();
+	    if (bit_size == 0)
+	      {
+		/* unpack_bits_as_long doesn't handle this case the
+		   way we'd like, so handle it here.  */
+		result = 0;
+	      }
+	    else
+	      result = unpack_bits_as_long (type, valaddr, bit_off, bit_size);
+	  }
 	else
-	  result = extract_signed_integer (valaddr, len, byte_order);
+	  {
+	    if (nosign)
+	      result = extract_unsigned_integer (valaddr, len, byte_order);
+	    else
+	      result = extract_signed_integer (valaddr, len, byte_order);
+	  }
 	if (code == TYPE_CODE_RANGE)
 	  result += type->bounds ()->bias;
 	return result;
@@ -3339,6 +3356,13 @@ pack_long (gdb_byte *buf, struct type *type, LONGEST num)
     case TYPE_CODE_FLAGS:
     case TYPE_CODE_BOOL:
     case TYPE_CODE_MEMBERPTR:
+      if (type->bit_size_differs_p ())
+	{
+	  unsigned bit_off = type->bit_offset ();
+	  unsigned bit_size = type->bit_size ();
+	  num &= ((ULONGEST) 1 << bit_size) - 1;
+	  num <<= bit_off;
+	}
       store_signed_integer (buf, len, byte_order, num);
       break;
 
@@ -3381,6 +3405,13 @@ pack_unsigned_long (gdb_byte *buf, struct type *type, ULONGEST num)
     case TYPE_CODE_BOOL:
     case TYPE_CODE_RANGE:
     case TYPE_CODE_MEMBERPTR:
+      if (type->bit_size_differs_p ())
+	{
+	  unsigned bit_off = type->bit_offset ();
+	  unsigned bit_size = type->bit_size ();
+	  num &= ((ULONGEST) 1 << bit_size) - 1;
+	  num <<= bit_off;
+	}
       store_unsigned_integer (buf, len, byte_order, num);
       break;
 
-- 
2.17.2



More information about the Gdb-patches mailing list