[PATCH] gdb: consider null terminator for length arguments of value_cstring calls
Pedro Alves
pedro@palves.net
Thu Jul 15 14:29:45 GMT 2021
On 2021-07-14 2:50 p.m., Simon Marchi wrote:
>> For C, it makes sence to me to return a null terminated string. Though I wonder
>> whether the string you get should depend on language? I mean, see this:
>>
>> (gdb) set language c
>> (gdb) ptype "foo"
>> type = char [4]
>> (gdb) set language ada
>> (gdb) ptype "foo"
>> type = array (1 .. 3) of character
>>
>> Whatever we decide, I think we should update the manual to make it explicit.
>
> Agreed, I think the result of:
>
> (gdb) ptype "foo"
>
> should be the same as
>
> (gdb) set args foo
> (gdb) ptype $_gdb_setting("args")
>
> for every language.
>
> For Ada though, I get:
...
> Let's pretend I didn't see this and try with a non-Asan GDB:
>
> $ ./gdb -q -nx --data-directory=data-directory -ex "set language ada" -ex "set args foo" -ex 'ptype $_gdb_setting("args")' -ex 'print $_gdb_setting("args")' -batch
> type = <4-byte integer>
> $1 = (102, 111, 111)
>
> $ ./gdb -q -nx --data-directory=data-directory -ex "set language ada" -ex "set args foo" -ex 'ptype $_gdb_setting("args")' -ex 'print $_gdb_setting("args")' -batch
> type = <4-byte integer>
> $1 = (102, 111, 111, 0)
>
> Not sure what the 4-byte integer refers to. But in any case, the result
> doesn't look good.
That "4-byte integer" thing is a red herring and a known issue -- it also happens in C:
$ $g -q -nx -ex "set language c" -ex "set args foo" -ex 'ptype $_gdb_setting("args")' -ex 'print $_gdb_setting("args")' -batch
type = int
$1 = "foo"
See settings.exp:
# Verifies that $_gdb_setting (SETTING) gives a value whose ptype matches EXPECTED.
proc check_type {setting expected} {
with_test_prefix "check_type $setting $expected" {
gdb_test "print \$_gdb_maint_setting(\"$setting\")"
gdb_test "ptype $" "$expected"
# Currently, GDB ptype <internal function> always tells it is type int.
# ptype should better report an error such as:
# "No type information for GDB functions"
# Test 'type int', so as to make it fail if ptype is changed.
gdb_test "ptype \$_gdb_maint_setting(\"$setting\")" \
"type = int"
}
}
So instead, try (current master):
$ ./gdb -q -nx --data-directory=data-directory -ex "set language c" -ex "set args foo" -ex 'print $_gdb_setting("args")' -ex 'ptype $' -batch
$1 = "foo"
type = char [3]
>
> The created type (using value_cstring vs value_string) should probably
> depend on the language. I tried swapping value_cstring for
> value_string, but that wasn't enough. So, it seems there's work to do,
> but I don't think I'm the right person to do it as I don't know much
> about Ada (and don't have much time to dig into it).
I think what we need is a language hook to return a string struct value from
a const char * / len. For C, the implementation would use value_cstring,
for Ada it would use value_string. The testcase can cycle through
all languages and make sure that "print/ptype "foo" and
print/ptype \$_gdb_maint_setting return the same thing.
I gave this a try, see patch below, on top of your v2. I've also put it in the
users/palves/value_string branch on sourceware.
Note I went the other direction and made value_cstring work with characters instead of
bytes, and made it add the \0 itself. Thus I undid the guile change to use nullptr
instead of &len, as well as the strlen()+1 changes.
After this, the only value_cstring calls left are in the C parser, and in the
language method:
$ grep value_cstring -rn
value.h:792:extern struct value *value_cstring (const char *ptr, ssize_t len,
c-lang.c:680: result = value_cstring ((const char *) obstack_base (&output),
valops.c:1753:value_cstring (const char *ptr, ssize_t len, struct type *char_type)
language.c:985: return value_cstring (ptr, len, type);
Note how the the current python and guile code were already depending on
the language, but in an odd way. E.g., in Python:
-#define builtin_type_pychar \
- language_string_char_type (python_language, python_gdbarch)
- value = value_cstring (s.get (), strlen (s.get ()) + 1,
- builtin_type_pychar);
I don't know what that was supposed to do when the language was not C?
The test runs into a few pre-existing oddities:
(gdb) set language modula-2
(gdb) p "foo"
strings are not implemented
(gdb) p $_gdb_maint_setting("test-settings string")
',' or ')' expected
(gdb) set language unknown
(gdb) p "foo"
Aborted (core dumped)
I have no idea how rust one is just weird, but then again if parsing worked, I don't
think the call would work anyway, since Rust strings are a {ptr,len}
aggregate, not a plain char array.
I filed PR gdb/28093 for the crash.
>From 50991aaf22b03a9925ae37155f16bc8257f95242 Mon Sep 17 00:00:00 2001
From: Pedro Alves <pedro@palves.net>
Date: Thu, 15 Jul 2021 10:47:56 +0100
Subject: [PATCH] all languages
Change-Id: I79ef914087dbf85e1cbc19263843a6034383afe7
---
gdb/ada-lang.c | 10 +++
gdb/c-lang.c | 12 ++-
gdb/cli/cli-cmds.c | 22 +++--
gdb/f-lang.c | 8 ++
gdb/f-lang.h | 3 +
gdb/guile/scm-math.c | 8 +-
gdb/language.c | 9 +++
gdb/language.h | 7 ++
gdb/python/py-value.c | 8 +-
.../gdb.base/internal-string-values.exp | 80 +++++++++++++++++++
gdb/valops.c | 7 +-
gdb/value.c | 5 +-
gdb/value.h | 8 +-
13 files changed, 152 insertions(+), 35 deletions(-)
diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c
index b098991612d..82319e49326 100644
--- a/gdb/ada-lang.c
+++ b/gdb/ada-lang.c
@@ -12900,6 +12900,16 @@ class ada_language : public language_defn
return language_defn::read_var_value (var, var_block, frame);
}
+ struct value *value_string (struct gdbarch *gdbarch,
+ const char *ptr, ssize_t len) const override
+ {
+ struct type *type = language_string_char_type (this, gdbarch);
+ value *val = ::value_string (ptr, len, type);
+ /* See ada_string_operation::evaluate. */
+ value_type (val)->set_code (TYPE_CODE_ARRAY);
+ return val;
+ }
+
/* See language.h. */
void language_arch_info (struct gdbarch *gdbarch,
struct language_arch_info *lai) const override
diff --git a/gdb/c-lang.c b/gdb/c-lang.c
index 98f4984b020..d30cebe9f0c 100644
--- a/gdb/c-lang.c
+++ b/gdb/c-lang.c
@@ -653,16 +653,11 @@ c_string_operation::evaluate (struct type *expect_type,
}
else
{
- int i;
-
- /* Write the terminating character. */
- for (i = 0; i < TYPE_LENGTH (type); ++i)
- obstack_1grow (&output, 0);
+ int element_size = TYPE_LENGTH (type);
if (satisfy_expected)
{
LONGEST low_bound, high_bound;
- int element_size = TYPE_LENGTH (type);
if (!get_discrete_bounds (expect_type->index_type (),
&low_bound, &high_bound))
@@ -677,10 +672,13 @@ c_string_operation::evaluate (struct type *expect_type,
result = allocate_value (expect_type);
memcpy (value_contents_raw (result), obstack_base (&output),
obstack_object_size (&output));
+ /* Write the terminating character. */
+ memset (value_contents_raw (result) + obstack_object_size (&output),
+ 0, element_size);
}
else
result = value_cstring ((const char *) obstack_base (&output),
- obstack_object_size (&output),
+ obstack_object_size (&output) / element_size,
type);
}
return result;
diff --git a/gdb/cli/cli-cmds.c b/gdb/cli/cli-cmds.c
index f6fc5ab854f..b445b75fae2 100644
--- a/gdb/cli/cli-cmds.c
+++ b/gdb/cli/cli-cmds.c
@@ -2146,12 +2146,10 @@ value_from_setting (const cmd_list_element *cmd, struct gdbarch *gdbarch)
case var_filename:
case var_enum:
if (*(char **) cmd->var)
- return value_cstring (*(char **) cmd->var,
- strlen (*(char **) cmd->var) + 1,
- builtin_type (gdbarch)->builtin_char);
+ return current_language->value_string (gdbarch, *(char **) cmd->var,
+ strlen (*(char **) cmd->var));
else
- return value_cstring ("", 1,
- builtin_type (gdbarch)->builtin_char);
+ return current_language->value_string (gdbarch, "", 0);
default:
gdb_assert_not_reached ("bad var_type");
}
@@ -2199,8 +2197,9 @@ str_value_from_setting (const cmd_list_element *cmd, struct gdbarch *gdbarch)
{
std::string cmd_val = get_setshow_command_value_string (cmd);
- return value_cstring (cmd_val.c_str (), cmd_val.size () + 1,
- builtin_type (gdbarch)->builtin_char);
+ return current_language->value_string (gdbarch,
+ cmd_val.data (),
+ cmd_val.size ());
}
case var_string:
@@ -2213,12 +2212,11 @@ str_value_from_setting (const cmd_list_element *cmd, struct gdbarch *gdbarch)
escaping quotes. So, we directly use the cmd->var string value,
similarly to the value_from_setting code for these cases. */
if (*(char **) cmd->var)
- return value_cstring (*(char **) cmd->var,
- strlen (*(char **) cmd->var) + 1,
- builtin_type (gdbarch)->builtin_char);
+ return current_language->value_string (gdbarch,
+ *(char **) cmd->var,
+ strlen (*(char **) cmd->var));
else
- return value_cstring ("", 1,
- builtin_type (gdbarch)->builtin_char);
+ return current_language->value_string (gdbarch, "", 0);
default:
gdb_assert_not_reached ("bad var_type");
diff --git a/gdb/f-lang.c b/gdb/f-lang.c
index 16ec9e04044..869a95d4304 100644
--- a/gdb/f-lang.c
+++ b/gdb/f-lang.c
@@ -103,6 +103,14 @@ f_language::get_encoding (struct type *type)
+struct value *
+f_language::value_string (struct gdbarch *gdbarch,
+ const char *ptr, ssize_t len) const
+{
+ struct type *type = language_string_char_type (this, gdbarch);
+ return ::value_string (ptr, len, type);
+}
+
/* A helper function for the "bound" intrinsics that checks that TYPE
is an array. LBOUND_P is true for lower bound; this is used for
the error message, if any. */
diff --git a/gdb/f-lang.h b/gdb/f-lang.h
index 1ccdd3978ea..1306b135779 100644
--- a/gdb/f-lang.h
+++ b/gdb/f-lang.h
@@ -193,6 +193,9 @@ class f_language : public language_defn
&& TYPE_TARGET_TYPE (type)->code () == TYPE_CODE_CHAR));
}
+ struct value *value_string (struct gdbarch *gdbarch,
+ const char *ptr, ssize_t len) const override;
+
/* See language.h. */
const char *struct_too_deep_ellipsis () const override
diff --git a/gdb/guile/scm-math.c b/gdb/guile/scm-math.c
index 8b7fe9b69ec..a88c805fc23 100644
--- a/gdb/guile/scm-math.c
+++ b/gdb/guile/scm-math.c
@@ -776,6 +776,8 @@ vlscm_convert_typed_value_from_scheme (const char *func_name,
}
else if (scm_is_string (obj))
{
+ size_t len;
+
if (type != NULL)
{
except_scm = gdbscm_make_misc_error (func_name, type_arg_pos,
@@ -787,14 +789,12 @@ vlscm_convert_typed_value_from_scheme (const char *func_name,
{
/* TODO: Provide option to specify conversion strategy. */
gdb::unique_xmalloc_ptr<char> s
- = gdbscm_scm_to_string (obj, nullptr,
+ = gdbscm_scm_to_string (obj, &len,
target_charset (gdbarch),
0 /*non-strict*/,
&except_scm);
if (s != NULL)
- value = value_cstring (s.get (), strlen (s.get ()) + 1,
- language_string_char_type (language,
- gdbarch));
+ value = language->value_string (gdbarch, s.get (), len);
else
value = NULL;
}
diff --git a/gdb/language.c b/gdb/language.c
index 0d1e3848de8..2ee32a1edff 100644
--- a/gdb/language.c
+++ b/gdb/language.c
@@ -47,6 +47,7 @@
#include <algorithm>
#include "gdbarch.h"
#include "compile/compile-internal.h"
+#include "arch-utils.h"
static void set_range_case (void);
@@ -976,6 +977,14 @@ language_string_char_type (const struct language_defn *la,
return ld->arch_info[la->la_language].string_char_type ();
}
+struct value *
+language_defn::value_string (struct gdbarch *gdbarch,
+ const char *ptr, ssize_t len) const
+{
+ struct type *type = language_string_char_type (this, gdbarch);
+ return value_cstring (ptr, len, type);
+}
+
/* See language.h. */
struct type *
diff --git a/gdb/language.h b/gdb/language.h
index 21ed47b3580..87064962b9b 100644
--- a/gdb/language.h
+++ b/gdb/language.h
@@ -579,6 +579,13 @@ struct language_defn
virtual char string_lower_bound () const
{ return c_style_arrays_p () ? 0 : 1; }
+ /* Return the LEN characters long string at STR as a value as
+ represented in this language. GDBARCH is used to infer the
+ character type. The default implementation returns a
+ null-terminated C string. */
+ virtual struct value *value_string (struct gdbarch *gdbarch,
+ const char *ptr, ssize_t len) const;
+
/* Returns true if the symbols names should be stored in GDB's data
structures for minimal/partial/full symbols using their linkage (aka
mangled) form; false if the symbol names should be demangled first.
diff --git a/gdb/python/py-value.c b/gdb/python/py-value.c
index bc0f88b7a16..f5b72446912 100644
--- a/gdb/python/py-value.c
+++ b/gdb/python/py-value.c
@@ -50,9 +50,6 @@
#define builtin_type_pybool \
language_bool_type (python_language, python_gdbarch)
-#define builtin_type_pychar \
- language_string_char_type (python_language, python_gdbarch)
-
struct value_object {
PyObject_HEAD
struct value_object *next;
@@ -1907,8 +1904,9 @@ convert_value_from_python (PyObject *obj)
gdb::unique_xmalloc_ptr<char> s
= python_string_to_target_string (obj);
if (s != NULL)
- value = value_cstring (s.get (), strlen (s.get ()) + 1,
- builtin_type_pychar);
+ value = python_language->value_string (python_gdbarch,
+ s.get (),
+ strlen (s.get ()));
}
else if (PyObject_TypeCheck (obj, &value_object_type))
value = value_copy (((value_object *) obj)->value);
diff --git a/gdb/testsuite/gdb.base/internal-string-values.exp b/gdb/testsuite/gdb.base/internal-string-values.exp
index 0748ab0984c..3380d9d8077 100644
--- a/gdb/testsuite/gdb.base/internal-string-values.exp
+++ b/gdb/testsuite/gdb.base/internal-string-values.exp
@@ -118,6 +118,86 @@ proc_with_prefix test_setting { } {
gdb_test_no_output {set $v = $_gdb_setting_str("remotetimeout")}
check_v_string "2"
}
+
+ # Test string values made by $_gdb_setting & co. in all languages.
+ with_test_prefix "all langs" {
+ # Get list of supported languages.
+ set langs {}
+ gdb_test_multiple "complete set language " "" {
+ -re "set language (\[^\r\n\]+)" {
+ lappend langs $expect_out(1,string)
+ exp_continue
+ }
+ -re "$::gdb_prompt $" {
+ pass $gdb_test_name
+ }
+ }
+
+ proc quote {str} {
+ upvar lang lang
+
+ if {$lang == "fortran"} {
+ return "'$str'"
+ } else {
+ return "\"$str\""
+ }
+ }
+
+ gdb_test "maintenance set test-settings string foo"
+ foreach_with_prefix lang $langs {
+ gdb_test_no_output "set language $lang"
+
+ if {$lang == "modula-2"} {
+ gdb_test "print \"foo\"" "strings are not implemented"
+ continue
+ }
+
+ if {$lang == "rust"} {
+ gdb_test "print \"foo\"" \
+ "evaluation of this expression requires the target program to be active"
+
+ # We could get the above working by starting the
+ # program, but how to make the below work?
+ gdb_test "print \$_gdb_maint_setting(\"test-settings string\")" \
+ "',' or '\\)' expected"
+ continue
+ }
+
+ if {$lang == "unknown"} {
+ # Skipped because of PR gdb/28093 -- GDB would crash.
+ continue
+ }
+
+ set print_output ""
+ set ptype_output ""
+
+ set foo_str [quote foo]
+ gdb_test_multiple "print $foo_str" "" {
+ -wrap -re " = (.*)" {
+ set print_output $expect_out(1,string)
+ pass $gdb_test_name
+ }
+ }
+
+ gdb_test_multiple "ptype $foo_str" "" {
+ -wrap -re " = (.*)" {
+ set ptype_output $expect_out(1,string)
+ pass $gdb_test_name
+ }
+ }
+
+ set cmd_str [quote "test-settings string"]
+ set ptype_output_re [string_to_regexp $ptype_output]
+ set print_output_re [string_to_regexp $print_output]
+
+ foreach_with_prefix conv_func {$_gdb_maint_setting $_gdb_maint_setting_str} {
+ gdb_test "print ${conv_func}($cmd_str)" \
+ " = $print_output_re"
+ gdb_test "ptype \$" \
+ " = $ptype_output_re"
+ }
+ }
+ }
}
# Test with a string value created by gdb.Value in Python.
diff --git a/gdb/valops.c b/gdb/valops.c
index f67612e87b0..161b8696cb2 100644
--- a/gdb/valops.c
+++ b/gdb/valops.c
@@ -1754,12 +1754,15 @@ value_cstring (const char *ptr, ssize_t len, struct type *char_type)
{
struct value *val;
int lowbound = current_language->string_lower_bound ();
- ssize_t highbound = len / TYPE_LENGTH (char_type);
+ ssize_t highbound = len + 1;
struct type *stringtype
= lookup_array_range_type (char_type, lowbound, highbound + lowbound - 1);
val = allocate_value (stringtype);
- memcpy (value_contents_raw (val), ptr, len);
+ memcpy (value_contents_raw (val), ptr, len * TYPE_LENGTH (char_type));
+ /* Write the terminating character. */
+ memset (value_contents_raw (val) + len * TYPE_LENGTH (char_type),
+ 0, TYPE_LENGTH (char_type));
return val;
}
diff --git a/gdb/value.c b/gdb/value.c
index db34d2e5762..092325c8731 100644
--- a/gdb/value.c
+++ b/gdb/value.c
@@ -2188,8 +2188,9 @@ value_of_internalvar (struct gdbarch *gdbarch, struct internalvar *var)
break;
case INTERNALVAR_STRING:
- val = value_cstring (var->u.string, strlen (var->u.string) + 1,
- builtin_type (gdbarch)->builtin_char);
+ val = current_language->value_string (gdbarch,
+ var->u.string,
+ strlen (var->u.string));
break;
case INTERNALVAR_VALUE:
diff --git a/gdb/value.h b/gdb/value.h
index 24392437ed0..3313b9fd346 100644
--- a/gdb/value.h
+++ b/gdb/value.h
@@ -782,10 +782,12 @@ class scoped_value_mark
const struct value *m_value;
};
-/* Create a not_lval value with type TYPE_CODE_ARRAY.
+/* Create not_lval value representing a NULL-terminated C string. The
+ resulting value has type TYPE_CODE_ARRAY.
- PTR points to the string data; LEN is the length of the string including the
- NULL terminator. */
+ PTR points to the string data; LEN is number of characters (does
+ not include the NULL terminator). CHAR_TYPE is the character
+ type. */
extern struct value *value_cstring (const char *ptr, ssize_t len,
struct type *char_type);
base-commit: f9e5d80cf75c4c549c392b6bb1bd33e103824657
prerequisite-patch-id: 77de8c2c15ea6664d089023bd3332da3e23913c0
--
2.26.2
More information about the Gdb-patches
mailing list