This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] vfprintf: Remove alloca from string formatting with conversion
- From: Adhemerval Zanella <adhemerval dot zanella at linaro dot org>
- To: libc-alpha at sourceware dot org
- Date: Tue, 27 Jun 2017 17:32:43 -0300
- Subject: Re: [PATCH] vfprintf: Remove alloca from string formatting with conversion
- Authentication-results: sourceware.org; auth=none
- References: <20170619162034.8F1FA402AEC0E@oldenburg.str.redhat.com>
On 19/06/2017 13:20, Florian Weimer wrote:
> 2017-06-19 Florian Weimer <fweimer@redhat.com>
>
> * stdio-common/vfprintf.c (done_add_func): New function.
> (done_add): Use it.
> (OTHER_CHAR_T, CONVERT_FROM_OTHER_STRING): New macros.
> (pad_func): New function.
> (PAD): Use it.
> (outstring_func): New function.
> (outstring): Use it.
> (outstring_converted_wide_string): New function.
> (process_arg): Use it.
>
> diff --git a/stdio-common/Makefile b/stdio-common/Makefile
> index 309d83e..622a85f 100644
> --- a/stdio-common/Makefile
> +++ b/stdio-common/Makefile
> @@ -87,6 +87,7 @@ $(objpfx)tst-grouping.out: $(gen-locales)
> $(objpfx)tst-sprintf.out: $(gen-locales)
> $(objpfx)tst-sscanf.out: $(gen-locales)
> $(objpfx)tst-swprintf.out: $(gen-locales)
> +$(objpfx)tst-vfprintf-mbs-prec.out: $(gen-locales)
> endif
>
> tst-printf-bz18872-ENV = MALLOC_TRACE=$(objpfx)tst-printf-bz18872.mtrace
> diff --git a/stdio-common/vfprintf.c b/stdio-common/vfprintf.c
> index b15c5d0..d58f0ea 100644
> --- a/stdio-common/vfprintf.c
> +++ b/stdio-common/vfprintf.c
> @@ -67,22 +67,37 @@
> } while (0)
> #define UNBUFFERED_P(S) ((S)->_IO_file_flags & _IO_UNBUFFERED)
>
> -#define done_add(val) \
> - do { \
> - unsigned int _val = val; \
> - assert ((unsigned int) done < (unsigned int) INT_MAX); \
> - if (__glibc_unlikely (INT_MAX - done < _val)) \
> - { \
> - done = -1; \
> - __set_errno (EOVERFLOW); \
> - goto all_done; \
> - } \
> - done += _val; \
> - } while (0)
> +/* Add LENGTH to DONE. Return the new value of DONE, or -1 on
> + overflow (and set errno accordingly). */
> +static inline int
> +done_add_func (int length, int done)
> +{
> + if (done < 0)
> + return done;
> + if (__glibc_unlikely (INT_MAX - done < length))
> + {
> + __set_errno (EOVERFLOW);
> + return -1;
> + }
> + return done + length;
> +}
Wouldn't be better to add a wrapper on malloc/malloc-internal for overflow
check? Something like
static inline bool
check_add_overflow_int_t (int left, int right, int *result)
{
#if __GNUC__ >= 5
return __builtin_add_overflow (left, right, result);
#else
if (INT_MAX - left <= right)
return true;
*result = left + right;
return false;
#endif
}
As we already do for multiplication and for unsigned (from my char_array)?
> +
> +#define done_add(val) \
> + do \
> + { \
> + /* Ensure that VAL has a type similar to int. */ \
> + _Static_assert (sizeof (val) == sizeof (int), "value int size"); \
> + _Static_assert ((__typeof__ (val)) -1 < 0, "value signed"); \
> + done = done_add_func ((val), done); \
> + if (done < 0) \
> + goto all_done; \
> + } \
> + while (0)
>
> #ifndef COMPILE_WPRINTF
> # define vfprintf _IO_vfprintf_internal
> # define CHAR_T char
> +# define OTHER_CHAR_T wchar_t
> # define UCHAR_T unsigned char
> # define INT_T int
> typedef const char *THOUSANDS_SEP_T;
> @@ -91,25 +106,14 @@ typedef const char *THOUSANDS_SEP_T;
> # define STR_LEN(Str) strlen (Str)
>
> # define PUT(F, S, N) _IO_sputn ((F), (S), (N))
> -# define PAD(Padchar) \
> - do { \
> - if (width > 0) \
> - { \
> - _IO_ssize_t written = _IO_padn (s, (Padchar), width); \
> - if (__glibc_unlikely (written != width)) \
> - { \
> - done = -1; \
> - goto all_done; \
> - } \
> - done_add (written); \
> - } \
> - } while (0)
> # define PUTC(C, F) _IO_putc_unlocked (C, F)
> # define ORIENT if (_IO_vtable_offset (s) == 0 && _IO_fwide (s, -1) != -1)\
> return -1
> +# define CONVERT_FROM_OTHER_STRING __wcsrtombs
> #else
> # define vfprintf _IO_vfwprintf
> # define CHAR_T wchar_t
> +# define OTHER_CHAR_T char
> /* This is a hack!!! There should be a type uwchar_t. */
> # define UCHAR_T unsigned int /* uwchar_t */
> # define INT_T wint_t
> @@ -121,21 +125,9 @@ typedef wchar_t THOUSANDS_SEP_T;
> # include <_itowa.h>
>
> # define PUT(F, S, N) _IO_sputn ((F), (S), (N))
> -# define PAD(Padchar) \
> - do { \
> - if (width > 0) \
> - { \
> - _IO_ssize_t written = _IO_wpadn (s, (Padchar), width); \
> - if (__glibc_unlikely (written != width)) \
> - { \
> - done = -1; \
> - goto all_done; \
> - } \
> - done_add (written); \
> - } \
> - } while (0)
> # define PUTC(C, F) _IO_putwc_unlocked (C, F)
> # define ORIENT if (_IO_fwide (s, 1) != 1) return -1
> +# define CONVERT_FROM_OTHER_STRING __mbsrtowcs
>
> # undef _itoa
> # define _itoa(Val, Buf, Base, Case) _itowa (Val, Buf, Base, Case)
> @@ -144,6 +136,33 @@ typedef wchar_t THOUSANDS_SEP_T;
> # define EOF WEOF
> #endif
>
> +static inline int
> +pad_func (FILE *s, CHAR_T padchar, int width, int done)
> +{
> + if (width > 0)
> + {
> + _IO_ssize_t written;
> +#ifndef COMPILE_WPRINTF
> + written = _IO_padn (s, padchar, width);
> +#else
> + written = _IO_wpadn (s, padchar, width);
> +#endif
> + if (__glibc_unlikely (written != width))
> + return -1;
> + return done_add_func (width, done);
> + }
> + return done;
> +}
> +
> +#define PAD(Padchar) \
> + do \
> + { \
> + done = pad_func (s, (Padchar), width, done); \
> + if (done < 0) \
> + goto all_done; \
> + } \
> + while (0)
> +
> #include "_i18n_number.h"
>
> /* Include the shared code for parsing the format string. */
> @@ -163,25 +182,124 @@ typedef wchar_t THOUSANDS_SEP_T;
> } \
> while (0)
>
> -#define outstring(String, Len) \
> - do \
> - { \
> - assert ((size_t) done <= (size_t) INT_MAX); \
> - if ((size_t) PUT (s, (String), (Len)) != (size_t) (Len)) \
> - { \
> - done = -1; \
> - goto all_done; \
> - } \
> - if (__glibc_unlikely (INT_MAX - done < (Len))) \
> - { \
> - done = -1; \
> - __set_errno (EOVERFLOW); \
> - goto all_done; \
> - } \
> - done += (Len); \
> - } \
> +static inline int
> +outstring_func (FILE *s, const UCHAR_T *string, size_t length, int done)
> +{
> + assert ((size_t) done <= (size_t) INT_MAX);
> + if ((size_t) PUT (s, string, length) != (size_t) (length))
> + return -1;
> + return done_add_func (length, done);
> +}
> +
> +#define outstring(String, Len) \
> + do \
> + { \
> + const void *string_ = (String); \
> + done = outstring_func (s, string_, (Len), done); \
> + if (done < 0) \
> + goto all_done; \
> + } \
> while (0)
>
> +/* Write the string SRC to S. If PREC is non-negative, write at most
> + PREC bytes. If LEFT is true, perform left justification. */
> +static int
> +outstring_converted_wide_string (FILE *s, const OTHER_CHAR_T *src, int prec,
> + int width, bool left, int done)
> +{
> + mbstate_t mbstate;
> +
> + /* Use a small buffer to combine processing of multiple characters.
> + CONVERT_FROM_OTHER_STRING expects the buffer size in (wide)
> + characters, and buf_length counts that. */
> +#ifdef COMPILE_WPRINTF
> + enum { buf_length = 64 };
> + wchar_t buf[buf_length];
> +#else
> + enum { buf_length = 256 };
> + char buf[buf_length];
> + _Static_assert (sizeof (buf) > MB_LEN_MAX,
> + "buffer is large enough for a single multi-byte character");
> +#endif
Why different 'buf_length' sizes if you using the required type for array
creation?
> +
> + /* Add the initial padding if needed. */
> + if (width > 0 && !left)
> + {
> + /* Make a first pass to find the output width, so that we can
> + add the required padding. */
> + memset (&mbstate, 0, sizeof (mbstate_t));
> + const OTHER_CHAR_T *src_copy = src;
> + size_t total_written;
> + if (prec < 0)
> + total_written = CONVERT_FROM_OTHER_STRING
> + (NULL, &src_copy, 0, &mbstate);
> + else
> + {
> + /* The source might not be null-terminated. Enforce the
> + limit manually, based on the output length. */
> + total_written = 0;
> + size_t limit = prec;
> + while (limit > 0 && src_copy != NULL)
I think this comparison using limit as size_t will always be valid,
shouldn't 'limit' be a 'int'?
> + {
> + size_t write_limit = buf_length;
> + if (write_limit > limit)
> + write_limit = limit;
> + size_t written = CONVERT_FROM_OTHER_STRING
> + (buf, &src_copy, write_limit, &mbstate);
> + if (written == (size_t) -1)
> + return -1;
> + if (written == 0)
> + break;
> + total_written += written;
> + limit -= written;
> + }
> + }
> +
> + /* Output initial padding. */
> + if (total_written < width)
> + {
> + done = pad_func (s, L_(' '), width - total_written, done);
> + if (done < 0)
> + return done;
> + }
> + }
> +
> + /* Convert the input string, piece by piece. */
> + memset (&mbstate, 0, sizeof (mbstate_t));
> + size_t total_written = 0;
> + {
> + /* If prec is negative, remaining is not decremented, otherwise,
> + it serves as the write limit. */
> + size_t remaining = -1;
> + if (prec >= 0)
> + remaining = prec;
> + while (remaining > 0 && src != NULL)
As before, shouldn't 'remaining' be a signed type as well?
> + {
> + size_t write_limit = buf_length;
> + if (remaining < write_limit)
> + write_limit = remaining;
> + size_t written = CONVERT_FROM_OTHER_STRING
> + (buf, &src, write_limit, &mbstate);
> + if (written == (size_t) -1)
> + return -1;
> + if (written == 0)
> + break;
> + done = outstring_func (s, (const UCHAR_T *) buf, written, done);
> + if (done < 0)
> + return done;
> + total_written += written;
> + if (prec >= 0)
> + remaining -= written;
> + }
> + }
> +
> + /* Add final padding. */
> + if (width > 0 && left && total_written < width)
> + return pad_func (s, L_(' '), width - total_written, done);
> + else
> + return done;
> +}
> +
> /* For handling long_double and longlong we use the same flag. If
> `long' and `long long' are effectively the same type define it to
> zero. */
> @@ -978,7 +1096,6 @@ static const uint8_t jump_table[] =
> LABEL (form_string): \
> { \
> size_t len; \
> - int string_malloced; \
> \
> /* The string argument could in fact be `char *' or `wchar_t *'. \
> But this should not make a difference here. */ \
> @@ -990,7 +1107,6 @@ static const uint8_t jump_table[] =
> /* Entry point for printing other strings. */ \
> LABEL (print_string): \
> \
> - string_malloced = 0; \
> if (string == NULL) \
> { \
> /* Write "(null)" if there's space. */ \
> @@ -1008,41 +1124,12 @@ static const uint8_t jump_table[] =
> } \
> else if (!is_long && spec != L_('S')) \
> { \
> - /* This is complicated. We have to transform the multibyte \
> - string into a wide character string. */ \
> - const char *mbs = (const char *) string; \
> - mbstate_t mbstate; \
> - \
> - len = prec != -1 ? __strnlen (mbs, (size_t) prec) : strlen (mbs); \
> - \
> - /* Allocate dynamically an array which definitely is long \
> - enough for the wide character version. Each byte in the \
> - multi-byte string can produce at most one wide character. */ \
> - if (__glibc_unlikely (len > SIZE_MAX / sizeof (wchar_t))) \
> - { \
> - __set_errno (EOVERFLOW); \
> - done = -1; \
> - goto all_done; \
> - } \
> - else if (__libc_use_alloca (len * sizeof (wchar_t))) \
> - string = (CHAR_T *) alloca (len * sizeof (wchar_t)); \
> - else if ((string = (CHAR_T *) malloc (len * sizeof (wchar_t))) \
> - == NULL) \
> - { \
> - done = -1; \
> - goto all_done; \
> - } \
> - else \
> - string_malloced = 1; \
> - \
> - memset (&mbstate, '\0', sizeof (mbstate_t)); \
> - len = __mbsrtowcs (string, &mbs, len, &mbstate); \
> - if (len == (size_t) -1) \
> - { \
> - /* Illegal multibyte character. */ \
> - done = -1; \
> - goto all_done; \
> - } \
> + done = outstring_converted_wide_string \
> + (s, (const char *) string, prec, width, left, done); \
> + if (done < 0) \
> + goto all_done; \
> + /* The padding has already been written. */ \
> + break; \
> } \
> else \
> { \
> @@ -1065,8 +1152,6 @@ static const uint8_t jump_table[] =
> outstring (string, len); \
> if (left) \
> PAD (L' '); \
> - if (__glibc_unlikely (string_malloced)) \
> - free (string); \
> } \
> break;
> #else
> @@ -1115,7 +1200,6 @@ static const uint8_t jump_table[] =
> LABEL (form_string): \
> { \
> size_t len; \
> - int string_malloced; \
> \
> /* The string argument could in fact be `char *' or `wchar_t *'. \
> But this should not make a difference here. */ \
> @@ -1127,7 +1211,6 @@ static const uint8_t jump_table[] =
> /* Entry point for printing other strings. */ \
> LABEL (print_string): \
> \
> - string_malloced = 0; \
> if (string == NULL) \
> { \
> /* Write "(null)" if there's space. */ \
> @@ -1153,51 +1236,12 @@ static const uint8_t jump_table[] =
> } \
> else \
> { \
> - const wchar_t *s2 = (const wchar_t *) string; \
> - mbstate_t mbstate; \
> - \
> - memset (&mbstate, '\0', sizeof (mbstate_t)); \
> - \
> - if (prec >= 0) \
> - { \
> - /* The string `s2' might not be NUL terminated. */ \
> - if (__libc_use_alloca (prec)) \
> - string = (char *) alloca (prec); \
> - else if ((string = (char *) malloc (prec)) == NULL) \
> - { \
> - done = -1; \
> - goto all_done; \
> - } \
> - else \
> - string_malloced = 1; \
> - len = __wcsrtombs (string, &s2, prec, &mbstate); \
> - } \
> - else \
> - { \
> - len = __wcsrtombs (NULL, &s2, 0, &mbstate); \
> - if (len != (size_t) -1) \
> - { \
> - assert (__mbsinit (&mbstate)); \
> - s2 = (const wchar_t *) string; \
> - if (__libc_use_alloca (len + 1)) \
> - string = (char *) alloca (len + 1); \
> - else if ((string = (char *) malloc (len + 1)) == NULL) \
> - { \
> - done = -1; \
> - goto all_done; \
> - } \
> - else \
> - string_malloced = 1; \
> - (void) __wcsrtombs (string, &s2, len + 1, &mbstate); \
> - } \
> - } \
> - \
> - if (len == (size_t) -1) \
> - { \
> - /* Illegal wide-character string. */ \
> - done = -1; \
> - goto all_done; \
> - } \
> + done = outstring_converted_wide_string \
> + (s, (const wchar_t *) string, prec, width, left, done); \
> + if (done < 0) \
> + goto all_done; \
> + /* The padding has already been written. */ \
> + break; \
> } \
> \
> if ((width -= len) < 0) \
> @@ -1211,8 +1255,6 @@ static const uint8_t jump_table[] =
> outstring (string, len); \
> if (left) \
> PAD (' '); \
> - if (__glibc_unlikely (string_malloced)) \
> - free (string); \
> } \
> break;
> #endif
>