[PATCH 11/18] Add an emitter callback to generic_printstr and generic_emit_char

Wed Feb 16 17:47:29 GMT 2022

* Tom Tromey <tom@tromey.com> [2022-02-16 06:55:11 -0700]:

> This adds an emitter callback to generic_printstr and
> generic_emit_char, passing it through to print_wchar.  print_wchar is
> modified to call it, if possible.  This will be used to let languages
> override the way that escape sequences are emitted.  Nothing uses this
> yet, that comes later in the series.
> ---
>  gdb/valprint.c | 41 ++++++++++++++++++++++++++++-------------
>  gdb/valprint.h | 30 ++++++++++++++++++++++++++++--
>  2 files changed, 56 insertions(+), 15 deletions(-)
> 
> diff --git a/gdb/valprint.c b/gdb/valprint.c
> index 00c0cd2c72a..c0e5e678005 100644
> --- a/gdb/valprint.c
> +++ b/gdb/valprint.c
> @@ -2201,12 +2201,19 @@ print_wchar (gdb_wint_t w, const gdb_byte *orig,
>  	     int orig_len, int width,
>  	     enum bfd_endian byte_order,
>  	     struct obstack *output,
> -	     int quoter, bool *need_escapep)
> +	     int quoter, bool *need_escapep,
> +	     emit_char_ftype emitter)
>  {
>    bool need_escape = *need_escapep;
>  
>    *need_escapep = false;
>  
> +  obstack_wide_file file (output);
> +  if (emitter != nullptr
> +      && emitter (&file, w, gdb::make_array_view (orig, orig_len),
> +		  width, byte_order, quoter))

Rather than having the emitter called sometimes.  Did you consider
moving everything below this point into a separate function, say,
default_emit_char, then change the declarations to something like:

  extern void generic_emit_char (int c, struct type *type, struct ui_file *stream,
  			       int quoter, const char *encoding,
  			       emit_char_ftype emitter = default_emit_char);

Your emitter signature would need updating to not return a bool.

Language specific implementation would then do their own thing and/or
call default_emit_char as they saw fit.

I started prototyping this, to see what it might look like, and the
problem I ran into is that your emitter doesn't pass through the
need_escapep argument, which I think is a mistake.

As I understand it, the need_escapep allows us to handle the case you
fixed in patch #6 - printing things like "\0" "1" as "\000\061" - but
if this emitter is need to modify how escape sequences are printed,
then surely we're going to need to know this information, right?  I
haven't looked ahead yet, but, surely, there's at least the
possibility that a language might need to track if the last character
was an escape or not so it can avoid the same sort of issues?  Or what
if the language prints something as an escape, and then the default
code, for the next character, prints something that might merge with
the escape?

Thanks,
Andrew

> +    return;
> +
>    switch (w)
>      {
>        case LCST ('\a'):
> @@ -2246,7 +2253,6 @@ print_wchar (gdb_wint_t w, const gdb_byte *orig,
>  	  else
>  	    {
>  	      int i;
> -	      obstack_wide_file file (output);
>  
>  	      for (i = 0; i + width <= orig_len; i += width)
>  		{
> @@ -2281,7 +2287,8 @@ print_wchar (gdb_wint_t w, const gdb_byte *orig,
>  
>  void
>  generic_emit_char (int c, struct type *type, struct ui_file *stream,
> -		   int quoter, const char *encoding)
> +		   int quoter, const char *encoding,
> +		   emit_char_ftype emitter)
>  {
>    enum bfd_endian byte_order
>      = type_byte_order (type);
> @@ -2330,14 +2337,16 @@ generic_emit_char (int c, struct type *type, struct ui_file *stream,
>  	      for (i = 0; i < num_chars; ++i)
>  		print_wchar (chars[i], buf, buflen,
>  			     TYPE_LENGTH (type), byte_order,
> -			     &wchar_buf, quoter, &need_escape);
> +			     &wchar_buf, quoter, &need_escape,
> +			     emitter);
>  	    }
>  	}
>  
>        /* This handles the NUM_CHARS == 0 case as well.  */
>        if (print_escape)
>  	print_wchar (gdb_WEOF, buf, buflen, TYPE_LENGTH (type),
> -		     byte_order, &wchar_buf, quoter, &need_escape);
> +		     byte_order, &wchar_buf, quoter, &need_escape,
> +		     emitter);
>      }
>  
>    /* The output in the host encoding.  */
> @@ -2445,7 +2454,8 @@ print_converted_chars_to_obstack (struct obstack *obstack,
>  				  const std::vector<converted_character> &chars,
>  				  int quote_char, int width,
>  				  enum bfd_endian byte_order,
> -				  const struct value_print_options *options)
> +				  const struct value_print_options *options,
> +				  emit_char_ftype emitter)
>  {
>    unsigned int idx;
>    const converted_character *elem;
> @@ -2486,10 +2496,12 @@ print_converted_chars_to_obstack (struct obstack *obstack,
>  	      {
>  		if (elem->result == wchar_iterate_ok)
>  		  print_wchar (elem->chars[0], elem->buf, elem->buflen, width,
> -			       byte_order, obstack, quote_char, &need_escape);
> +			       byte_order, obstack, quote_char, &need_escape,
> +			       emitter);
>  		else
>  		  print_wchar (gdb_WEOF, elem->buf, elem->buflen, width,
> -			       byte_order, obstack, quote_char, &need_escape);
> +			       byte_order, obstack, quote_char, &need_escape,
> +			       emitter);
>  	      }
>  	  }
>  	  break;
> @@ -2514,10 +2526,12 @@ print_converted_chars_to_obstack (struct obstack *obstack,
>  	    obstack_grow_wstr (obstack, LCST ("'"));
>  	    if (elem->result == wchar_iterate_ok)
>  	      print_wchar (elem->chars[0], elem->buf, elem->buflen, width,
> -			   byte_order, obstack, quote_char, &need_escape);
> +			   byte_order, obstack, quote_char, &need_escape,
> +			   emitter);
>  	    else
>  	      print_wchar (gdb_WEOF, elem->buf, elem->buflen, width,
> -			   byte_order, obstack, quote_char, &need_escape);
> +			   byte_order, obstack, quote_char, &need_escape,
> +			   emitter);
>  	    obstack_grow_wstr (obstack, LCST ("'"));
>  	    std::string s = string_printf (_(" <repeats %u times>"),
>  					   elem->repeat_count);
> @@ -2543,7 +2557,7 @@ print_converted_chars_to_obstack (struct obstack *obstack,
>  	  /* Output the incomplete sequence string.  */
>  	  obstack_grow_wstr (obstack, LCST ("<incomplete sequence "));
>  	  print_wchar (gdb_WEOF, elem->buf, elem->buflen, width, byte_order,
> -		       obstack, 0, &need_escape);
> +		       obstack, 0, &need_escape, emitter);
>  	  obstack_grow_wstr (obstack, LCST (">"));
>  
>  	  /* We do not attempt to output anything after this.  */
> @@ -2602,7 +2616,8 @@ generic_printstr (struct ui_file *stream, struct type *type,
>  		  const gdb_byte *string, unsigned int length, 
>  		  const char *encoding, int force_ellipses,
>  		  int quote_char, int c_style_terminator,
> -		  const struct value_print_options *options)
> +		  const struct value_print_options *options,
> +		  emit_char_ftype emitter)
>  {
>    enum bfd_endian byte_order = type_byte_order (type);
>    unsigned int i;
> @@ -2678,7 +2693,7 @@ generic_printstr (struct ui_file *stream, struct type *type,
>  
>    /* Print the output string to the obstack.  */
>    print_converted_chars_to_obstack (&wchar_buf, converted_chars, quote_char,
> -				    width, byte_order, options);
> +				    width, byte_order, options, emitter);
>  
>    if (force_ellipses || !finished)
>      obstack_grow_wstr (&wchar_buf, LCST ("..."));
> diff --git a/gdb/valprint.h b/gdb/valprint.h
> index 0586836f9e6..64fea1ccb4a 100644
> --- a/gdb/valprint.h
> +++ b/gdb/valprint.h
> @@ -233,14 +233,40 @@ extern void generic_value_print (struct value *val, struct ui_file *stream,
>  				 const struct value_print_options *options,
>  				 const struct generic_val_print_decorations *d);
>  
> +/* A callback that can be used to print a representation of a wide
> +   character to a stream.
> +   
> +   If the character can be represented by this callback, it will
> +   return true.  A false return indicates that the default behavior
> +   should be taken -- for printable characters, the default is to emit
> +   it verbatim; for non-printable characters, C-style escapes are
> +   used.  Normally a callback should always return false for
> +   printable, non-control characters.
> +
> +   STREAM is the stream to write to.
> +   W is the character.  It might be gdb_WEOF, meaning an unconvertible
> +   sequence.
> +   ORIG is the original (target) bytes corresponding to W.
> +   WIDTH is the width of a base character in the encoding.
> +   BYTE_ORDER is the character type's byte order.
> +   QUOTER is the quote character used -- this is a host character.  */
> +typedef gdb::function_view<bool (ui_file *stream,
> +				 gdb_wint_t w,
> +				 gdb::array_view<const gdb_byte> orig,
> +				 int width,
> +				 enum bfd_endian byte_order,
> +				 int quoter)> emit_char_ftype;
> +
>  extern void generic_emit_char (int c, struct type *type, struct ui_file *stream,
> -			       int quoter, const char *encoding);
> +			       int quoter, const char *encoding,
> +			       emit_char_ftype emitter = nullptr);
>  
>  extern void generic_printstr (struct ui_file *stream, struct type *type, 
>  			      const gdb_byte *string, unsigned int length, 
>  			      const char *encoding, int force_ellipses,
>  			      int quote_char, int c_style_terminator,
> -			      const struct value_print_options *options);
> +			      const struct value_print_options *options,
> +			      emit_char_ftype emitter = nullptr);
>  
>  /* Run the "output" command.  ARGS and FROM_TTY are the usual
>     arguments passed to all command implementations, except ARGS is
> -- 
> 2.31.1
>