[PATCH] gdb/python: make more use of host_string_to_python_string

Andrew Burgess aburgess@redhat.com
Mon Dec 6 18:10:30 GMT 2021


We have a function host_string_to_python_string, which is a wrapper
around PyString_Decode, which, on Python 3, is an alias for
PyUnicode_Decode.

However, there are a few places where we still call PyUnicode_Decode
directly.

This commit replaces all uses of PyUnicode_Decode with calls to
host_string_to_python_string instead.

To make the use of host_string_to_python_string easier, I've added a
couple of overloads for this function in python-internal.h, these all
just forward their calls onto the single base implementation.  The
signatures of all host_string_to_python_string overloads are:

  gdbpy_ref<> host_string_to_python_string (const char *str, size_t len);
  gdbpy_ref<> host_string_to_python_string (const char *str)
  gdbpy_ref<> host_string_to_python_string (const string_file &str)

For Python 3 PyString_Decode is setup (in python-internal.h) as an
alias for PyUnicode_Decode, so there should certainly be no user
visible changes in this case.

For Python 2 this commit will change the behaviour.  Previously by
calling PyUnicode_Decode we would have been returning a Unicode
object.  Now, after calling host_string_to_python_string, we will have
a str object.

I've checked the GDB documentation, and, as far as I can tell, the
methods I've touched were all documented as returning a string, or in
the gdb.Command case, take a string as an argument, and my
understanding is that for Python 2, string generally means str.  So I
think the new behaviour would be more expected.

A different solution, that would also make things more consistent in
the Python 2 world, would be to have host_string_to_python_string
always return a Unicode object.  However, I've been reading this page:

  https://pythonhosted.org/kitchen/unicode-frustrations.html

item #3 recommends that unicode strings should be converted to str
objects before being printed (in Python 2).  That would mean that
users should have been adding .encode() calls to the output of the
routines I've changed in this commit (if they wanted to print that
output), which is not something I think is made clear from the GDB
documentation.
---
 gdb/python/py-cmd.c          |  9 +++------
 gdb/python/py-frame.c        |  5 ++---
 gdb/python/py-type.c         |  3 +--
 gdb/python/py-utils.c        |  5 ++---
 gdb/python/py-value.c        |  4 ++--
 gdb/python/python-internal.h | 11 ++++++++++-
 gdb/python/python.c          |  4 ++--
 7 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/gdb/python/py-cmd.c b/gdb/python/py-cmd.c
index 94608e0bbcf..b2cafeba320 100644
--- a/gdb/python/py-cmd.c
+++ b/gdb/python/py-cmd.c
@@ -120,8 +120,7 @@ cmdpy_function (const char *args, int from_tty, cmd_list_element *command)
 
   if (! args)
     args = "";
-  gdbpy_ref<> argobj (PyUnicode_Decode (args, strlen (args), host_charset (),
-					NULL));
+  gdbpy_ref<> argobj = host_string_to_python_string (args);
   if (argobj == NULL)
     {
       gdbpy_print_stack ();
@@ -181,8 +180,7 @@ cmdpy_completer_helper (struct cmd_list_element *command,
       return NULL;
     }
 
-  gdbpy_ref<> textobj (PyUnicode_Decode (text, strlen (text), host_charset (),
-					 NULL));
+  gdbpy_ref<> textobj = host_string_to_python_string (text);
   if (textobj == NULL)
     error (_("Could not convert argument to Python string."));
 
@@ -194,8 +192,7 @@ cmdpy_completer_helper (struct cmd_list_element *command,
     }
   else
     {
-      wordobj.reset (PyUnicode_Decode (word, strlen (word), host_charset (),
-				       NULL));
+      wordobj = host_string_to_python_string (word);
       if (wordobj == NULL)
 	error (_("Could not convert argument to Python string."));
     }
diff --git a/gdb/python/py-frame.c b/gdb/python/py-frame.c
index ee57eb10576..b507ff0794f 100644
--- a/gdb/python/py-frame.c
+++ b/gdb/python/py-frame.c
@@ -131,8 +131,7 @@ frapy_name (PyObject *self, PyObject *args)
 
   if (name)
     {
-      result = PyUnicode_Decode (name.get (), strlen (name.get ()),
-				 host_charset (), NULL);
+      result = host_string_to_python_string (name.get ()).release ();
     }
   else
     {
@@ -658,7 +657,7 @@ gdbpy_frame_stop_reason_string (PyObject *self, PyObject *args)
     }
 
   str = unwind_stop_reason_to_string ((enum unwind_stop_reason) reason);
-  return PyUnicode_Decode (str, strlen (str), host_charset (), NULL);
+  return host_string_to_python_string (str).release ();
 }
 
 /* Implements the equality comparison for Frame objects.
diff --git a/gdb/python/py-type.c b/gdb/python/py-type.c
index 8b17b70fbe3..a178c6a4ab2 100644
--- a/gdb/python/py-type.c
+++ b/gdb/python/py-type.c
@@ -1033,8 +1033,7 @@ typy_str (PyObject *self)
       GDB_PY_HANDLE_EXCEPTION (except);
     }
 
-  return PyUnicode_Decode (thetype.c_str (), thetype.size (),
-			   host_charset (), NULL);
+  return host_string_to_python_string (thetype).release ();
 }
 
 /* Implement the richcompare method.  */
diff --git a/gdb/python/py-utils.c b/gdb/python/py-utils.c
index 10c4173efcd..2eb1ed2a09e 100644
--- a/gdb/python/py-utils.c
+++ b/gdb/python/py-utils.c
@@ -152,10 +152,9 @@ python_string_to_host_string (PyObject *obj)
 /* Convert a host string to a python string.  */
 
 gdbpy_ref<>
-host_string_to_python_string (const char *str)
+host_string_to_python_string (const char *str, size_t len)
 {
-  return gdbpy_ref<> (PyString_Decode (str, strlen (str), host_charset (),
-				       NULL));
+  return gdbpy_ref<> (PyString_Decode (str, len, host_charset (), nullptr));
 }
 
 /* Return true if OBJ is a Python string or unicode object, false
diff --git a/gdb/python/py-value.c b/gdb/python/py-value.c
index c843c2c3072..8bd30729454 100644
--- a/gdb/python/py-value.c
+++ b/gdb/python/py-value.c
@@ -743,7 +743,7 @@ valpy_format_string (PyObject *self, PyObject *args, PyObject *kw)
       GDB_PY_HANDLE_EXCEPTION (except);
     }
 
-  return PyUnicode_Decode (stb.c_str (), stb.size (), host_charset (), NULL);
+  return host_string_to_python_string (stb).release ();
 }
 
 /* A helper function that implements the various cast operators.  */
@@ -1149,7 +1149,7 @@ valpy_str (PyObject *self)
       GDB_PY_HANDLE_EXCEPTION (except);
     }
 
-  return PyUnicode_Decode (stb.c_str (), stb.size (), host_charset (), NULL);
+  return host_string_to_python_string (stb).release ();
 }
 
 /* Implements gdb.Value.is_optimized_out.  */
diff --git a/gdb/python/python-internal.h b/gdb/python/python-internal.h
index 211833e4b2d..195cb0a1896 100644
--- a/gdb/python/python-internal.h
+++ b/gdb/python/python-internal.h
@@ -22,6 +22,7 @@
 
 #include "extension.h"
 #include "extension-priv.h"
+#include "ui-file.h"
 
 /* These WITH_* macros are defined by the CPython API checker that
    comes with the Python plugin for GCC.  See:
@@ -715,7 +716,15 @@ gdb::unique_xmalloc_ptr<char> unicode_to_target_string (PyObject *unicode_str);
 gdb::unique_xmalloc_ptr<char> python_string_to_target_string (PyObject *obj);
 gdbpy_ref<> python_string_to_target_python_string (PyObject *obj);
 gdb::unique_xmalloc_ptr<char> python_string_to_host_string (PyObject *obj);
-gdbpy_ref<> host_string_to_python_string (const char *str);
+gdbpy_ref<> host_string_to_python_string (const char *str, size_t len);
+static inline gdbpy_ref<> host_string_to_python_string (const char *str)
+{
+  return host_string_to_python_string (str, strlen (str));
+}
+static inline gdbpy_ref<> host_string_to_python_string (const string_file &str)
+{
+  return host_string_to_python_string (str.c_str (), str.size ());
+}
 int gdbpy_is_string (PyObject *obj);
 gdb::unique_xmalloc_ptr<char> gdbpy_obj_to_string (PyObject *obj);
 
diff --git a/gdb/python/python.c b/gdb/python/python.c
index 82af012068b..6e85d30ed97 100644
--- a/gdb/python/python.c
+++ b/gdb/python/python.c
@@ -558,7 +558,7 @@ gdbpy_target_charset (PyObject *self, PyObject *args)
 {
   const char *cset = target_charset (python_gdbarch);
 
-  return PyUnicode_Decode (cset, strlen (cset), host_charset (), NULL);
+  return host_string_to_python_string (cset).release ();
 }
 
 /* Wrapper for target_wide_charset.  */
@@ -568,7 +568,7 @@ gdbpy_target_wide_charset (PyObject *self, PyObject *args)
 {
   const char *cset = target_wide_charset (python_gdbarch);
 
-  return PyUnicode_Decode (cset, strlen (cset), host_charset (), NULL);
+  return host_string_to_python_string (cset).release ();
 }
 
 /* A Python function which evaluates a string using the gdb CLI.  */
-- 
2.25.4



More information about the Gdb-patches mailing list