This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] vfprintf: Remove alloca from string formatting with conversion



On 19/06/2017 13:20, Florian Weimer wrote:
> 2017-06-19  Florian Weimer  <fweimer@redhat.com>
> 
> 	* stdio-common/vfprintf.c (done_add_func): New function.
> 	(done_add): Use it.
> 	(OTHER_CHAR_T, CONVERT_FROM_OTHER_STRING): New macros.
> 	(pad_func): New function.
> 	(PAD): Use it.
> 	(outstring_func): New function.
> 	(outstring): Use it.
> 	(outstring_converted_wide_string): New function.
> 	(process_arg): Use it.
> 
> diff --git a/stdio-common/Makefile b/stdio-common/Makefile
> index 309d83e..622a85f 100644
> --- a/stdio-common/Makefile
> +++ b/stdio-common/Makefile
> @@ -87,6 +87,7 @@ $(objpfx)tst-grouping.out: $(gen-locales)
>  $(objpfx)tst-sprintf.out: $(gen-locales)
>  $(objpfx)tst-sscanf.out: $(gen-locales)
>  $(objpfx)tst-swprintf.out: $(gen-locales)
> +$(objpfx)tst-vfprintf-mbs-prec.out: $(gen-locales)
>  endif
>  
>  tst-printf-bz18872-ENV = MALLOC_TRACE=$(objpfx)tst-printf-bz18872.mtrace
> diff --git a/stdio-common/vfprintf.c b/stdio-common/vfprintf.c
> index b15c5d0..d58f0ea 100644
> --- a/stdio-common/vfprintf.c
> +++ b/stdio-common/vfprintf.c
> @@ -67,22 +67,37 @@
>      } while (0)
>  #define UNBUFFERED_P(S) ((S)->_IO_file_flags & _IO_UNBUFFERED)
>  
> -#define done_add(val) \
> -  do {									      \
> -    unsigned int _val = val;						      \
> -    assert ((unsigned int) done < (unsigned int) INT_MAX);		      \
> -    if (__glibc_unlikely (INT_MAX - done < _val))			      \
> -      {									      \
> -	done = -1;							      \
> -	 __set_errno (EOVERFLOW);					      \
> -	goto all_done;							      \
> -      }									      \
> -    done += _val;							      \
> -  } while (0)
> +/* Add LENGTH to DONE.  Return the new value of DONE, or -1 on
> +   overflow (and set errno accordingly).  */
> +static inline int
> +done_add_func (int length, int done)
> +{
> +  if (done < 0)
> +    return done;
> +  if (__glibc_unlikely (INT_MAX - done < length))
> +    {
> +      __set_errno (EOVERFLOW);
> +      return -1;
> +    }
> +  return done + length;
> +}

Wouldn't be better to add a wrapper on malloc/malloc-internal for overflow
check? Something like

static inline bool
check_add_overflow_int_t (int left, int right, int *result)
{
#if __GNUC__ >= 5
  return __builtin_add_overflow (left, right, result);
#else
  if (INT_MAX - left <= right)
    return true;
  *result = left + right;
  return false;
#endif
}

As we already do for multiplication and for unsigned (from my char_array)?


> +
> +#define done_add(val)							\
> +  do									\
> +    {									\
> +      /* Ensure that VAL has a type similar to int.  */			\
> +      _Static_assert (sizeof (val) == sizeof (int), "value int size");	\
> +      _Static_assert ((__typeof__ (val)) -1 < 0, "value signed");	\
> +      done = done_add_func ((val), done);				\
> +      if (done < 0)							\
> +	goto all_done;							\
> +    }									\
> +  while (0)
>  
>  #ifndef COMPILE_WPRINTF
>  # define vfprintf	_IO_vfprintf_internal
>  # define CHAR_T		char
> +# define OTHER_CHAR_T   wchar_t
>  # define UCHAR_T	unsigned char
>  # define INT_T		int
>  typedef const char *THOUSANDS_SEP_T;
> @@ -91,25 +106,14 @@ typedef const char *THOUSANDS_SEP_T;
>  # define STR_LEN(Str)	strlen (Str)
>  
>  # define PUT(F, S, N)	_IO_sputn ((F), (S), (N))
> -# define PAD(Padchar) \
> -  do {									      \
> -    if (width > 0)							      \
> -      {									      \
> -	_IO_ssize_t written = _IO_padn (s, (Padchar), width);		      \
> -	if (__glibc_unlikely (written != width))			      \
> -	  {								      \
> -	    done = -1;							      \
> -	    goto all_done;						      \
> -	  }								      \
> -	done_add (written);						      \
> -      }									      \
> -  } while (0)
>  # define PUTC(C, F)	_IO_putc_unlocked (C, F)
>  # define ORIENT		if (_IO_vtable_offset (s) == 0 && _IO_fwide (s, -1) != -1)\
>  			  return -1
> +# define CONVERT_FROM_OTHER_STRING __wcsrtombs
>  #else
>  # define vfprintf	_IO_vfwprintf
>  # define CHAR_T		wchar_t
> +# define OTHER_CHAR_T   char
>  /* This is a hack!!!  There should be a type uwchar_t.  */
>  # define UCHAR_T	unsigned int /* uwchar_t */
>  # define INT_T		wint_t
> @@ -121,21 +125,9 @@ typedef wchar_t THOUSANDS_SEP_T;
>  # include <_itowa.h>
>  
>  # define PUT(F, S, N)	_IO_sputn ((F), (S), (N))
> -# define PAD(Padchar) \
> -  do {									      \
> -    if (width > 0)							      \
> -      {									      \
> -	_IO_ssize_t written = _IO_wpadn (s, (Padchar), width);		      \
> -	if (__glibc_unlikely (written != width))			      \
> -	  {								      \
> -	    done = -1;							      \
> -	    goto all_done;						      \
> -	  }								      \
> -	done_add (written);						      \
> -      }									      \
> -  } while (0)
>  # define PUTC(C, F)	_IO_putwc_unlocked (C, F)
>  # define ORIENT		if (_IO_fwide (s, 1) != 1) return -1
> +# define CONVERT_FROM_OTHER_STRING __mbsrtowcs
>  
>  # undef _itoa
>  # define _itoa(Val, Buf, Base, Case) _itowa (Val, Buf, Base, Case)
> @@ -144,6 +136,33 @@ typedef wchar_t THOUSANDS_SEP_T;
>  # define EOF WEOF
>  #endif
>  
> +static inline int
> +pad_func (FILE *s, CHAR_T padchar, int width, int done)
> +{
> +  if (width > 0)
> +    {
> +      _IO_ssize_t written;
> +#ifndef COMPILE_WPRINTF
> +      written = _IO_padn (s, padchar, width);
> +#else
> +      written = _IO_wpadn (s, padchar, width);
> +#endif
> +      if (__glibc_unlikely (written != width))
> +	return -1;
> +      return done_add_func (width, done);
> +    }
> +  return done;
> +}
> +
> +#define PAD(Padchar)							\
> +  do									\
> +    {									\
> +      done = pad_func (s, (Padchar), width, done);			\
> +      if (done < 0)							\
> +	goto all_done;							\
> +    }									\
> +  while (0)
> +
>  #include "_i18n_number.h"
>  
>  /* Include the shared code for parsing the format string.  */
> @@ -163,25 +182,124 @@ typedef wchar_t THOUSANDS_SEP_T;
>      }									      \
>    while (0)
>  
> -#define outstring(String, Len)						      \
> -  do									      \
> -    {									      \
> -      assert ((size_t) done <= (size_t) INT_MAX);			      \
> -      if ((size_t) PUT (s, (String), (Len)) != (size_t) (Len))		      \
> -	{								      \
> -	  done = -1;							      \
> -	  goto all_done;						      \
> -	}								      \
> -      if (__glibc_unlikely (INT_MAX - done < (Len)))			      \
> -      {									      \
> -	done = -1;							      \
> -	 __set_errno (EOVERFLOW);					      \
> -	goto all_done;							      \
> -      }									      \
> -      done += (Len);							      \
> -    }									      \
> +static inline int
> +outstring_func (FILE *s, const UCHAR_T *string, size_t length, int done)
> +{
> +  assert ((size_t) done <= (size_t) INT_MAX);
> +  if ((size_t) PUT (s, string, length) != (size_t) (length))
> +    return -1;
> +  return done_add_func (length, done);
> +}
> +
> +#define outstring(String, Len)						\
> +  do									\
> +    {									\
> +      const void *string_ = (String);					\
> +      done = outstring_func (s, string_, (Len), done);			\
> +      if (done < 0)							\
> +	goto all_done;							\
> +    }									\
>    while (0)
>  
> +/* Write the string SRC to S.  If PREC is non-negative, write at most
> +   PREC bytes.  If LEFT is true, perform left justification.  */
> +static int
> +outstring_converted_wide_string (FILE *s, const OTHER_CHAR_T *src, int prec,
> +				 int width, bool left, int done)
> +{
> +  mbstate_t mbstate;
> +
> +  /* Use a small buffer to combine processing of multiple characters.
> +     CONVERT_FROM_OTHER_STRING expects the buffer size in (wide)
> +     characters, and buf_length counts that.  */
> +#ifdef COMPILE_WPRINTF
> +  enum { buf_length = 64 };
> +  wchar_t buf[buf_length];
> +#else
> +  enum { buf_length = 256 };
> +  char buf[buf_length];
> +  _Static_assert (sizeof (buf) > MB_LEN_MAX,
> +		  "buffer is large enough for a single multi-byte character");
> +#endif

Why different 'buf_length' sizes if you using the required type for array
creation?

> +
> +  /* Add the initial padding if needed.  */
> +  if (width > 0 && !left)
> +    {
> +      /* Make a first pass to find the output width, so that we can
> +	 add the required padding.  */
> +      memset (&mbstate, 0, sizeof (mbstate_t));
> +      const OTHER_CHAR_T *src_copy = src;
> +      size_t total_written;
> +      if (prec < 0)
> +	total_written = CONVERT_FROM_OTHER_STRING
> +	  (NULL, &src_copy, 0, &mbstate);
> +      else
> +	{
> +	  /* The source might not be null-terminated.  Enforce the
> +	     limit manually, based on the output length.  */
> +	  total_written = 0;
> +	  size_t limit = prec;
> +	  while (limit > 0 && src_copy != NULL)

I think this comparison using limit as size_t will always be valid,
shouldn't 'limit' be a 'int'?

> +	    {
> +	      size_t write_limit = buf_length;
> +	      if (write_limit > limit)
> +		write_limit = limit;
> +	      size_t written = CONVERT_FROM_OTHER_STRING
> +		(buf, &src_copy, write_limit, &mbstate);
> +	      if (written == (size_t) -1)
> +		return -1;
> +	      if (written == 0)
> +		break;
> +	      total_written += written;
> +	      limit -= written;
> +	    }
> +	}
> +
> +      /* Output initial padding.  */
> +      if (total_written < width)
> +	{
> +	  done = pad_func (s, L_(' '), width - total_written, done);
> +	  if (done < 0)
> +	    return done;
> +	}
> +    }
> +
> +  /* Convert the input string, piece by piece.  */
> +  memset (&mbstate, 0, sizeof (mbstate_t));
> +  size_t total_written = 0;
> +  {
> +    /* If prec is negative, remaining is not decremented, otherwise,
> +       it serves as the write limit.  */
> +    size_t remaining = -1;
> +    if (prec >= 0)
> +      remaining = prec;
> +    while (remaining > 0 && src != NULL)

As before, shouldn't 'remaining' be a signed type as well?

> +      {
> +	size_t write_limit = buf_length;
> +	if (remaining < write_limit)
> +	  write_limit = remaining;
> +	size_t written = CONVERT_FROM_OTHER_STRING
> +	  (buf, &src, write_limit, &mbstate);
> +	if (written == (size_t) -1)
> +	  return -1;
> +	if (written == 0)
> +	  break;
> +	done = outstring_func (s, (const UCHAR_T *) buf, written, done);
> +	if (done < 0)
> +	  return done;
> +	total_written += written;
> +	if (prec >= 0)
> +	  remaining -= written;
> +      }
> +  }
> +
> +  /* Add final padding.  */
> +  if (width > 0 && left && total_written < width)
> +    return pad_func (s, L_(' '), width - total_written, done);
> +  else
> +    return done;
> +}
> +
>  /* For handling long_double and longlong we use the same flag.  If
>     `long' and `long long' are effectively the same type define it to
>     zero.  */
> @@ -978,7 +1096,6 @@ static const uint8_t jump_table[] =
>      LABEL (form_string):						      \
>        {									      \
>  	size_t len;							      \
> -	int string_malloced;						      \
>  									      \
>  	/* The string argument could in fact be `char *' or `wchar_t *'.      \
>  	   But this should not make a difference here.  */		      \
> @@ -990,7 +1107,6 @@ static const uint8_t jump_table[] =
>  	/* Entry point for printing other strings.  */			      \
>        LABEL (print_string):						      \
>  									      \
> -	string_malloced = 0;						      \
>  	if (string == NULL)						      \
>  	  {								      \
>  	    /* Write "(null)" if there's space.  */			      \
> @@ -1008,41 +1124,12 @@ static const uint8_t jump_table[] =
>  	  }								      \
>  	else if (!is_long && spec != L_('S'))				      \
>  	  {								      \
> -	    /* This is complicated.  We have to transform the multibyte	      \
> -	       string into a wide character string.  */			      \
> -	    const char *mbs = (const char *) string;			      \
> -	    mbstate_t mbstate;						      \
> -									      \
> -	    len = prec != -1 ? __strnlen (mbs, (size_t) prec) : strlen (mbs); \
> -									      \
> -	    /* Allocate dynamically an array which definitely is long	      \
> -	       enough for the wide character version.  Each byte in the	      \
> -	       multi-byte string can produce at most one wide character.  */  \
> -	    if (__glibc_unlikely (len > SIZE_MAX / sizeof (wchar_t)))	      \
> -	      {								      \
> -		__set_errno (EOVERFLOW);				      \
> -		done = -1;						      \
> -		goto all_done;						      \
> -	      }								      \
> -	    else if (__libc_use_alloca (len * sizeof (wchar_t)))	      \
> -	      string = (CHAR_T *) alloca (len * sizeof (wchar_t));	      \
> -	    else if ((string = (CHAR_T *) malloc (len * sizeof (wchar_t)))    \
> -		     == NULL)						      \
> -	      {								      \
> -		done = -1;						      \
> -		goto all_done;						      \
> -	      }								      \
> -	    else							      \
> -	      string_malloced = 1;					      \
> -									      \
> -	    memset (&mbstate, '\0', sizeof (mbstate_t));		      \
> -	    len = __mbsrtowcs (string, &mbs, len, &mbstate);		      \
> -	    if (len == (size_t) -1)					      \
> -	      {								      \
> -		/* Illegal multibyte character.  */			      \
> -		done = -1;						      \
> -		goto all_done;						      \
> -	      }								      \
> +	    done = outstring_converted_wide_string			      \
> +	      (s, (const char *) string, prec, width, left, done);	      \
> +	    if (done < 0)						      \
> +	      goto all_done;						      \
> +	    /* The padding has already been written.  */		      \
> +	    break;							      \
>  	  }								      \
>  	else								      \
>  	  {								      \
> @@ -1065,8 +1152,6 @@ static const uint8_t jump_table[] =
>  	outstring (string, len);					      \
>  	if (left)							      \
>  	  PAD (L' ');							      \
> -	if (__glibc_unlikely (string_malloced))				      \
> -	  free (string);						      \
>        }									      \
>        break;
>  #else
> @@ -1115,7 +1200,6 @@ static const uint8_t jump_table[] =
>      LABEL (form_string):						      \
>        {									      \
>  	size_t len;							      \
> -	int string_malloced;						      \
>  									      \
>  	/* The string argument could in fact be `char *' or `wchar_t *'.      \
>  	   But this should not make a difference here.  */		      \
> @@ -1127,7 +1211,6 @@ static const uint8_t jump_table[] =
>  	/* Entry point for printing other strings.  */			      \
>        LABEL (print_string):						      \
>  									      \
> -	string_malloced = 0;						      \
>  	if (string == NULL)						      \
>  	  {								      \
>  	    /* Write "(null)" if there's space.  */			      \
> @@ -1153,51 +1236,12 @@ static const uint8_t jump_table[] =
>  	  }								      \
>  	else								      \
>  	  {								      \
> -	    const wchar_t *s2 = (const wchar_t *) string;		      \
> -	    mbstate_t mbstate;						      \
> -									      \
> -	    memset (&mbstate, '\0', sizeof (mbstate_t));		      \
> -									      \
> -	    if (prec >= 0)						      \
> -	      {								      \
> -		/* The string `s2' might not be NUL terminated.  */	      \
> -		if (__libc_use_alloca (prec))				      \
> -		  string = (char *) alloca (prec);			      \
> -		else if ((string = (char *) malloc (prec)) == NULL)	      \
> -		  {							      \
> -		    done = -1;						      \
> -		    goto all_done;					      \
> -		  }							      \
> -		else							      \
> -		  string_malloced = 1;					      \
> -		len = __wcsrtombs (string, &s2, prec, &mbstate);	      \
> -	      }								      \
> -	    else							      \
> -	      {								      \
> -		len = __wcsrtombs (NULL, &s2, 0, &mbstate);		      \
> -		if (len != (size_t) -1)					      \
> -		  {							      \
> -		    assert (__mbsinit (&mbstate));			      \
> -		    s2 = (const wchar_t *) string;			      \
> -		    if (__libc_use_alloca (len + 1))			      \
> -		      string = (char *) alloca (len + 1);		      \
> -		    else if ((string = (char *) malloc (len + 1)) == NULL)    \
> -		      {							      \
> -			done = -1;					      \
> -			goto all_done;					      \
> -		      }							      \
> -		    else						      \
> -		      string_malloced = 1;				      \
> -		    (void) __wcsrtombs (string, &s2, len + 1, &mbstate);      \
> -		  }							      \
> -	      }								      \
> -									      \
> -	    if (len == (size_t) -1)					      \
> -	      {								      \
> -		/* Illegal wide-character string.  */			      \
> -		done = -1;						      \
> -		goto all_done;						      \
> -	      }								      \
> +	    done = outstring_converted_wide_string			      \
> +	      (s, (const wchar_t *) string, prec, width, left, done);	      \
> +	    if (done < 0)						      \
> +	      goto all_done;						      \
> +	    /* The padding has already been written.  */		      \
> +	    break;							      \
>  	  }								      \
>  									      \
>  	if ((width -= len) < 0)						      \
> @@ -1211,8 +1255,6 @@ static const uint8_t jump_table[] =
>  	outstring (string, len);					      \
>  	if (left)							      \
>  	  PAD (' ');							      \
> -	if (__glibc_unlikely (string_malloced))			              \
> -	  free (string);						      \
>        }									      \
>        break;
>  #endif
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]