[PATCH 08/10] Implement C++14 numeric separators

Tom Tromey tom@tromey.com
Sun Apr 21 17:00:08 GMT 2024


C++14 allows the use of the apostrophe as a numeric separator; that
is, "23000" and "23'000" represent the same number.  This patch
implements this for gdb's C++ parser and the C++ name canonicalizer.

I did this unconditionally for all C variants because I think it's
unambiguous.

For the name canonicalizer, there's at least one compiler that can
emit constants with this form, see bug 30845.

Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=23457
Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=30845
---
 gdb/c-exp.y                   | 28 +++++++++++++++++++++++-----
 gdb/cp-name-parser.y          | 31 ++++++++++++++++++++++++++-----
 gdb/testsuite/gdb.cp/misc.exp |  4 ++++
 3 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/gdb/c-exp.y b/gdb/c-exp.y
index 663c30f9517..69cea523abd 100644
--- a/gdb/c-exp.y
+++ b/gdb/c-exp.y
@@ -2755,6 +2755,10 @@ lex_one_token (struct parser_state *par_state, bool *is_quoted_name)
 	    hex = 0;
 	  }
 
+	/* If the token includes the C++14 digits separator, we make a
+	   copy so that we don't have to handle the separator in
+	   parse_number.  */
+	std::optional<std::string> no_tick;
 	for (;; ++p)
 	  {
 	    /* This test includes !hex because 'e' is a valid hex digit
@@ -2771,18 +2775,32 @@ lex_one_token (struct parser_state *par_state, bool *is_quoted_name)
 	    else if (((got_e && (p[-1] == 'e' || p[-1] == 'E'))
 		      || (got_p && (p[-1] == 'p' || p[-1] == 'P')))
 		     && (*p == '-' || *p == '+'))
-	      /* This is the sign of the exponent, not the end of the
-		 number.  */
-	      continue;
+	      {
+		/* This is the sign of the exponent, not the end of
+		   the number.  */
+	      }
+	    else if (*p == '\'')
+	      {
+		if (!no_tick.has_value ())
+		  no_tick.emplace (tokstart, p);
+		continue;
+	      }
 	    /* We will take any letters or digits.  parse_number will
 	       complain if past the radix, or if L or U are not final.  */
 	    else if ((*p < '0' || *p > '9')
 		     && ((*p < 'a' || *p > 'z')
 				  && (*p < 'A' || *p > 'Z')))
 	      break;
+	    if (no_tick.has_value ())
+	      no_tick->push_back (*p);
 	  }
-	toktype = parse_number (par_state, tokstart, p - tokstart,
-				got_dot | got_e | got_p, &yylval);
+	if (no_tick.has_value ())
+	  toktype = parse_number (par_state, no_tick->c_str (),
+				  no_tick->length (),
+				  got_dot | got_e | got_p, &yylval);
+	else
+	  toktype = parse_number (par_state, tokstart, p - tokstart,
+				  got_dot | got_e | got_p, &yylval);
 	if (toktype == ERROR)
 	  {
 	    char *err_copy = (char *) alloca (p - tokstart + 1);
diff --git a/gdb/cp-name-parser.y b/gdb/cp-name-parser.y
index de08f9c0728..3c5dea2de1c 100644
--- a/gdb/cp-name-parser.y
+++ b/gdb/cp-name-parser.y
@@ -1686,6 +1686,10 @@ yylex (YYSTYPE *lvalp, cpname_state *state)
 	    hex = 0;
 	  }
 
+	/* If the token includes the C++14 digits separator, we make a
+	   copy so that we don't have to handle the separator in
+	   parse_number.  */
+	std::optional<std::string> no_tick;
 	for (;; ++p)
 	  {
 	    /* This test includes !hex because 'e' is a valid hex digit
@@ -1703,16 +1707,31 @@ yylex (YYSTYPE *lvalp, cpname_state *state)
 	      got_dot = 1;
 	    else if (got_e && (p[-1] == 'e' || p[-1] == 'E')
 		     && (*p == '-' || *p == '+'))
-	      /* This is the sign of the exponent, not the end of the
-		 number.  */
-	      continue;
+	      {
+		/* This is the sign of the exponent, not the end of
+		   the number.  */
+	      }
+	    /* C++14 allows a separator.  */
+	    else if (*p == '\'')
+	      {
+		if (!no_tick.has_value ())
+		  no_tick.emplace (tokstart, p);
+		continue;
+	      }
 	    /* We will take any letters or digits.  parse_number will
 	       complain if past the radix, or if L or U are not final.  */
 	    else if (! ISALNUM (*p))
 	      break;
+	    if (no_tick.has_value ())
+	      no_tick->push_back (*p);
 	  }
-	toktype = state->parse_number (tokstart, p - tokstart, got_dot|got_e,
-				       lvalp);
+	if (no_tick.has_value ())
+	  toktype = state->parse_number (no_tick->c_str (),
+					 no_tick->length (),
+					 got_dot|got_e, lvalp);
+	else
+	  toktype = state->parse_number (tokstart, p - tokstart,
+					 got_dot|got_e, lvalp);
 	if (toktype == ERROR)
 	  {
 	    char *err_copy = (char *) alloca (p - tokstart + 1);
@@ -2045,6 +2064,8 @@ canonicalize_tests ()
   should_be_the_same ("x::y::z<0b111>", "x::y::z<7>");
   should_be_the_same ("x::y::z<0b111>", "x::y::z<0t7>");
   should_be_the_same ("x::y::z<0b111>", "x::y::z<0D7>");
+
+  should_be_the_same ("x::y::z<0xff'ff>", "x::y::z<65535>");
 }
 
 #endif
diff --git a/gdb/testsuite/gdb.cp/misc.exp b/gdb/testsuite/gdb.cp/misc.exp
index 264294f857d..bcb20f85eee 100644
--- a/gdb/testsuite/gdb.cp/misc.exp
+++ b/gdb/testsuite/gdb.cp/misc.exp
@@ -114,3 +114,7 @@ gdb_test "print *(number_ref + v_bool_array)" "\\$\[0-9\]* = false" \
     "integer reference addition with pointer"
 gdb_test "print *(v_bool_array - number_ref)" "\\$\[0-9\]* = false" \
     "pointer subtraction with integer reference"
+
+# C++14 digit separator.
+gdb_test "print 23'23" " = 2323"
+gdb_test "print 2'3.5" " = 23.5"

-- 
2.44.0



More information about the Gdb-patches mailing list