support C/C++ identifiers named with non-ASCII characters
張俊芝
zjz@zjz.name
Mon May 21 09:54:00 GMT 2018
Hello, team.
This patch fixes the bug at
https://sourceware.org/bugzilla/show_bug.cgi?id=22973 .
Here is how to test the patch:
Step 1. If you are using Clang or any other C compilers that have
implemented
support for Unicode identifiers, then create a C file with the
following
content:
int main(int åé, char* å[])
{
struct é
{
int æ¸[3];
} é = {100, 200, 300};
int åº = 2;
return 0;
}
Or if you are using GCC, create a C file with the following content as a
workaround(GCC still doesn't actually support Unicode identifiers in
2018, which
is a pity):
int main(int \u53C3\u91CF, char* \u53C3[])
{
struct \u96C6
{
int \u6578[3];
} \u96C6 = {100, 200, 300};
int \u5E8F = 2;
return 0;
}
Step 2. Compile the C file.
Step 3. Run GDB for the compiled executable, add a breakpoint in "return 0".
Step 4. Run until the breakpoint.
Step 5. Test the following commands to see if they work:
p åé
p å
p é
p é.æ¸
p é.æ¸[åº]
Thanks for your review.
-------------- next part --------------
2018-05-20 張俊芝 <zjz@zjz.name>
* gdb/c-exp.y (is_identifier_separator): New function.
(lex_one_token): Now recognizes C and C++ Unicode identifiers by using
is_identifier_separator to determine the boundary of a token.
-------------- next part --------------
diff --git a/gdb/c-exp.y b/gdb/c-exp.y
index 5e10d2a3b4..b0dd6c7caf 100644
--- a/gdb/c-exp.y
+++ b/gdb/c-exp.y
@@ -73,6 +73,8 @@ void yyerror (const char *);
static int type_aggregate_p (struct type *);
+static bool is_identifier_separator (char);
+
%}
/* Although the yacc "value" of an expression is not used,
@@ -1718,6 +1720,53 @@ type_aggregate_p (struct type *type)
&& TYPE_DECLARED_CLASS (type)));
}
+/* While iterating all the characters in an identifier, an identifier separator
+ is a boundary where we know the iteration is done. */
+
+static bool
+is_identifier_separator (char c)
+{
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\0':
+ case '\'':
+ case '"':
+ case '\\':
+ case '(':
+ case ')':
+ case ',':
+ case '.':
+ case '+':
+ case '-':
+ case '*':
+ case '/':
+ case '|':
+ case '&':
+ case '^':
+ case '~':
+ case '!':
+ case '@':
+ case '[':
+ case ']':
+ /* '<' should not be a token separator, because it can be an open angle
+ bracket followed by a nested template identifier in C++. */
+ case '>':
+ case '?':
+ case ':':
+ case '=':
+ case '{':
+ case '}':
+ case ';':
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
/* Validate a parameter typelist. */
static void
@@ -1920,7 +1969,7 @@ parse_number (struct parser_state *par_state,
FIXME: This check is wrong; for example it doesn't find overflow
on 0x123456789 when LONGEST is 32 bits. */
if (c != 'l' && c != 'u' && n != 0)
- {
+ {
if ((unsigned_p && (ULONGEST) prevn >= (ULONGEST) n))
error (_("Numeric constant too large."));
}
@@ -2741,16 +2790,13 @@ lex_one_token (struct parser_state *par_state, bool *is_quoted_name)
}
}
- if (!(c == '_' || c == '$'
- || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
+ if (is_identifier_separator(c))
/* We must have come across a bad character (e.g. ';'). */
error (_("Invalid character '%c' in expression."), c);
/* It's a name. See how long it is. */
namelen = 0;
- for (c = tokstart[namelen];
- (c == '_' || c == '$' || (c >= '0' && c <= '9')
- || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '<');)
+ for (c = tokstart[namelen]; !is_identifier_separator(c);)
{
/* Template parameter lists are part of the name.
FIXME: This mishandles `print $a<4&&$a>3'. */
@@ -2932,7 +2978,7 @@ classify_name (struct parser_state *par_state, const struct block *block,
filename. However, if the name was quoted, then it is better
to check for a filename or a block, since this is the only
way the user has of requiring the extension to be used. */
- if ((is_a_field_of_this.type == NULL && !is_after_structop)
+ if ((is_a_field_of_this.type == NULL && !is_after_structop)
|| is_quoted_name)
{
/* See if it's a file name. */
More information about the Gdb-patches
mailing list