This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] vfprintf: Remove alloca from string formatting with conversion
- From: fweimer at redhat dot com (Florian Weimer)
- To: libc-alpha at sourceware dot org
- Date: Mon, 19 Jun 2017 18:20:34 +0200
- Subject: [PATCH] vfprintf: Remove alloca from string formatting with conversion
- Authentication-results: sourceware.org; auth=none
- Authentication-results: ext-mx04.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com
- Authentication-results: ext-mx04.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=fweimer at redhat dot com
- Dkim-filter: OpenDKIM Filter v2.11.0 mx1.redhat.com B7C9380B52
- Dmarc-filter: OpenDMARC Filter v1.3.2 mx1.redhat.com B7C9380B52
2017-06-19 Florian Weimer <fweimer@redhat.com>
* stdio-common/vfprintf.c (done_add_func): New function.
(done_add): Use it.
(OTHER_CHAR_T, CONVERT_FROM_OTHER_STRING): New macros.
(pad_func): New function.
(PAD): Use it.
(outstring_func): New function.
(outstring): Use it.
(outstring_converted_wide_string): New function.
(process_arg): Use it.
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 309d83e..622a85f 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -87,6 +87,7 @@ $(objpfx)tst-grouping.out: $(gen-locales)
$(objpfx)tst-sprintf.out: $(gen-locales)
$(objpfx)tst-sscanf.out: $(gen-locales)
$(objpfx)tst-swprintf.out: $(gen-locales)
+$(objpfx)tst-vfprintf-mbs-prec.out: $(gen-locales)
endif
tst-printf-bz18872-ENV = MALLOC_TRACE=$(objpfx)tst-printf-bz18872.mtrace
diff --git a/stdio-common/vfprintf.c b/stdio-common/vfprintf.c
index b15c5d0..d58f0ea 100644
--- a/stdio-common/vfprintf.c
+++ b/stdio-common/vfprintf.c
@@ -67,22 +67,37 @@
} while (0)
#define UNBUFFERED_P(S) ((S)->_IO_file_flags & _IO_UNBUFFERED)
-#define done_add(val) \
- do { \
- unsigned int _val = val; \
- assert ((unsigned int) done < (unsigned int) INT_MAX); \
- if (__glibc_unlikely (INT_MAX - done < _val)) \
- { \
- done = -1; \
- __set_errno (EOVERFLOW); \
- goto all_done; \
- } \
- done += _val; \
- } while (0)
+/* Add LENGTH to DONE. Return the new value of DONE, or -1 on
+ overflow (and set errno accordingly). */
+static inline int
+done_add_func (int length, int done)
+{
+ if (done < 0)
+ return done;
+ if (__glibc_unlikely (INT_MAX - done < length))
+ {
+ __set_errno (EOVERFLOW);
+ return -1;
+ }
+ return done + length;
+}
+
+#define done_add(val) \
+ do \
+ { \
+ /* Ensure that VAL has a type similar to int. */ \
+ _Static_assert (sizeof (val) == sizeof (int), "value int size"); \
+ _Static_assert ((__typeof__ (val)) -1 < 0, "value signed"); \
+ done = done_add_func ((val), done); \
+ if (done < 0) \
+ goto all_done; \
+ } \
+ while (0)
#ifndef COMPILE_WPRINTF
# define vfprintf _IO_vfprintf_internal
# define CHAR_T char
+# define OTHER_CHAR_T wchar_t
# define UCHAR_T unsigned char
# define INT_T int
typedef const char *THOUSANDS_SEP_T;
@@ -91,25 +106,14 @@ typedef const char *THOUSANDS_SEP_T;
# define STR_LEN(Str) strlen (Str)
# define PUT(F, S, N) _IO_sputn ((F), (S), (N))
-# define PAD(Padchar) \
- do { \
- if (width > 0) \
- { \
- _IO_ssize_t written = _IO_padn (s, (Padchar), width); \
- if (__glibc_unlikely (written != width)) \
- { \
- done = -1; \
- goto all_done; \
- } \
- done_add (written); \
- } \
- } while (0)
# define PUTC(C, F) _IO_putc_unlocked (C, F)
# define ORIENT if (_IO_vtable_offset (s) == 0 && _IO_fwide (s, -1) != -1)\
return -1
+# define CONVERT_FROM_OTHER_STRING __wcsrtombs
#else
# define vfprintf _IO_vfwprintf
# define CHAR_T wchar_t
+# define OTHER_CHAR_T char
/* This is a hack!!! There should be a type uwchar_t. */
# define UCHAR_T unsigned int /* uwchar_t */
# define INT_T wint_t
@@ -121,21 +125,9 @@ typedef wchar_t THOUSANDS_SEP_T;
# include <_itowa.h>
# define PUT(F, S, N) _IO_sputn ((F), (S), (N))
-# define PAD(Padchar) \
- do { \
- if (width > 0) \
- { \
- _IO_ssize_t written = _IO_wpadn (s, (Padchar), width); \
- if (__glibc_unlikely (written != width)) \
- { \
- done = -1; \
- goto all_done; \
- } \
- done_add (written); \
- } \
- } while (0)
# define PUTC(C, F) _IO_putwc_unlocked (C, F)
# define ORIENT if (_IO_fwide (s, 1) != 1) return -1
+# define CONVERT_FROM_OTHER_STRING __mbsrtowcs
# undef _itoa
# define _itoa(Val, Buf, Base, Case) _itowa (Val, Buf, Base, Case)
@@ -144,6 +136,33 @@ typedef wchar_t THOUSANDS_SEP_T;
# define EOF WEOF
#endif
+static inline int
+pad_func (FILE *s, CHAR_T padchar, int width, int done)
+{
+ if (width > 0)
+ {
+ _IO_ssize_t written;
+#ifndef COMPILE_WPRINTF
+ written = _IO_padn (s, padchar, width);
+#else
+ written = _IO_wpadn (s, padchar, width);
+#endif
+ if (__glibc_unlikely (written != width))
+ return -1;
+ return done_add_func (width, done);
+ }
+ return done;
+}
+
+#define PAD(Padchar) \
+ do \
+ { \
+ done = pad_func (s, (Padchar), width, done); \
+ if (done < 0) \
+ goto all_done; \
+ } \
+ while (0)
+
#include "_i18n_number.h"
/* Include the shared code for parsing the format string. */
@@ -163,25 +182,124 @@ typedef wchar_t THOUSANDS_SEP_T;
} \
while (0)
-#define outstring(String, Len) \
- do \
- { \
- assert ((size_t) done <= (size_t) INT_MAX); \
- if ((size_t) PUT (s, (String), (Len)) != (size_t) (Len)) \
- { \
- done = -1; \
- goto all_done; \
- } \
- if (__glibc_unlikely (INT_MAX - done < (Len))) \
- { \
- done = -1; \
- __set_errno (EOVERFLOW); \
- goto all_done; \
- } \
- done += (Len); \
- } \
+static inline int
+outstring_func (FILE *s, const UCHAR_T *string, size_t length, int done)
+{
+ assert ((size_t) done <= (size_t) INT_MAX);
+ if ((size_t) PUT (s, string, length) != (size_t) (length))
+ return -1;
+ return done_add_func (length, done);
+}
+
+#define outstring(String, Len) \
+ do \
+ { \
+ const void *string_ = (String); \
+ done = outstring_func (s, string_, (Len), done); \
+ if (done < 0) \
+ goto all_done; \
+ } \
while (0)
+/* Write the string SRC to S. If PREC is non-negative, write at most
+ PREC bytes. If LEFT is true, perform left justification. */
+static int
+outstring_converted_wide_string (FILE *s, const OTHER_CHAR_T *src, int prec,
+ int width, bool left, int done)
+{
+ mbstate_t mbstate;
+
+ /* Use a small buffer to combine processing of multiple characters.
+ CONVERT_FROM_OTHER_STRING expects the buffer size in (wide)
+ characters, and buf_length counts that. */
+#ifdef COMPILE_WPRINTF
+ enum { buf_length = 64 };
+ wchar_t buf[buf_length];
+#else
+ enum { buf_length = 256 };
+ char buf[buf_length];
+ _Static_assert (sizeof (buf) > MB_LEN_MAX,
+ "buffer is large enough for a single multi-byte character");
+#endif
+
+ /* Add the initial padding if needed. */
+ if (width > 0 && !left)
+ {
+ /* Make a first pass to find the output width, so that we can
+ add the required padding. */
+ memset (&mbstate, 0, sizeof (mbstate_t));
+ const OTHER_CHAR_T *src_copy = src;
+ size_t total_written;
+ if (prec < 0)
+ total_written = CONVERT_FROM_OTHER_STRING
+ (NULL, &src_copy, 0, &mbstate);
+ else
+ {
+ /* The source might not be null-terminated. Enforce the
+ limit manually, based on the output length. */
+ total_written = 0;
+ size_t limit = prec;
+ while (limit > 0 && src_copy != NULL)
+ {
+ size_t write_limit = buf_length;
+ if (write_limit > limit)
+ write_limit = limit;
+ size_t written = CONVERT_FROM_OTHER_STRING
+ (buf, &src_copy, write_limit, &mbstate);
+ if (written == (size_t) -1)
+ return -1;
+ if (written == 0)
+ break;
+ total_written += written;
+ limit -= written;
+ }
+ }
+
+ /* Output initial padding. */
+ if (total_written < width)
+ {
+ done = pad_func (s, L_(' '), width - total_written, done);
+ if (done < 0)
+ return done;
+ }
+ }
+
+ /* Convert the input string, piece by piece. */
+ memset (&mbstate, 0, sizeof (mbstate_t));
+ size_t total_written = 0;
+ {
+ /* If prec is negative, remaining is not decremented, otherwise,
+ it serves as the write limit. */
+ size_t remaining = -1;
+ if (prec >= 0)
+ remaining = prec;
+ while (remaining > 0 && src != NULL)
+ {
+ size_t write_limit = buf_length;
+ if (remaining < write_limit)
+ write_limit = remaining;
+ size_t written = CONVERT_FROM_OTHER_STRING
+ (buf, &src, write_limit, &mbstate);
+ if (written == (size_t) -1)
+ return -1;
+ if (written == 0)
+ break;
+ done = outstring_func (s, (const UCHAR_T *) buf, written, done);
+ if (done < 0)
+ return done;
+ total_written += written;
+ if (prec >= 0)
+ remaining -= written;
+ }
+ }
+
+ /* Add final padding. */
+ if (width > 0 && left && total_written < width)
+ return pad_func (s, L_(' '), width - total_written, done);
+ else
+ return done;
+}
+
/* For handling long_double and longlong we use the same flag. If
`long' and `long long' are effectively the same type define it to
zero. */
@@ -978,7 +1096,6 @@ static const uint8_t jump_table[] =
LABEL (form_string): \
{ \
size_t len; \
- int string_malloced; \
\
/* The string argument could in fact be `char *' or `wchar_t *'. \
But this should not make a difference here. */ \
@@ -990,7 +1107,6 @@ static const uint8_t jump_table[] =
/* Entry point for printing other strings. */ \
LABEL (print_string): \
\
- string_malloced = 0; \
if (string == NULL) \
{ \
/* Write "(null)" if there's space. */ \
@@ -1008,41 +1124,12 @@ static const uint8_t jump_table[] =
} \
else if (!is_long && spec != L_('S')) \
{ \
- /* This is complicated. We have to transform the multibyte \
- string into a wide character string. */ \
- const char *mbs = (const char *) string; \
- mbstate_t mbstate; \
- \
- len = prec != -1 ? __strnlen (mbs, (size_t) prec) : strlen (mbs); \
- \
- /* Allocate dynamically an array which definitely is long \
- enough for the wide character version. Each byte in the \
- multi-byte string can produce at most one wide character. */ \
- if (__glibc_unlikely (len > SIZE_MAX / sizeof (wchar_t))) \
- { \
- __set_errno (EOVERFLOW); \
- done = -1; \
- goto all_done; \
- } \
- else if (__libc_use_alloca (len * sizeof (wchar_t))) \
- string = (CHAR_T *) alloca (len * sizeof (wchar_t)); \
- else if ((string = (CHAR_T *) malloc (len * sizeof (wchar_t))) \
- == NULL) \
- { \
- done = -1; \
- goto all_done; \
- } \
- else \
- string_malloced = 1; \
- \
- memset (&mbstate, '\0', sizeof (mbstate_t)); \
- len = __mbsrtowcs (string, &mbs, len, &mbstate); \
- if (len == (size_t) -1) \
- { \
- /* Illegal multibyte character. */ \
- done = -1; \
- goto all_done; \
- } \
+ done = outstring_converted_wide_string \
+ (s, (const char *) string, prec, width, left, done); \
+ if (done < 0) \
+ goto all_done; \
+ /* The padding has already been written. */ \
+ break; \
} \
else \
{ \
@@ -1065,8 +1152,6 @@ static const uint8_t jump_table[] =
outstring (string, len); \
if (left) \
PAD (L' '); \
- if (__glibc_unlikely (string_malloced)) \
- free (string); \
} \
break;
#else
@@ -1115,7 +1200,6 @@ static const uint8_t jump_table[] =
LABEL (form_string): \
{ \
size_t len; \
- int string_malloced; \
\
/* The string argument could in fact be `char *' or `wchar_t *'. \
But this should not make a difference here. */ \
@@ -1127,7 +1211,6 @@ static const uint8_t jump_table[] =
/* Entry point for printing other strings. */ \
LABEL (print_string): \
\
- string_malloced = 0; \
if (string == NULL) \
{ \
/* Write "(null)" if there's space. */ \
@@ -1153,51 +1236,12 @@ static const uint8_t jump_table[] =
} \
else \
{ \
- const wchar_t *s2 = (const wchar_t *) string; \
- mbstate_t mbstate; \
- \
- memset (&mbstate, '\0', sizeof (mbstate_t)); \
- \
- if (prec >= 0) \
- { \
- /* The string `s2' might not be NUL terminated. */ \
- if (__libc_use_alloca (prec)) \
- string = (char *) alloca (prec); \
- else if ((string = (char *) malloc (prec)) == NULL) \
- { \
- done = -1; \
- goto all_done; \
- } \
- else \
- string_malloced = 1; \
- len = __wcsrtombs (string, &s2, prec, &mbstate); \
- } \
- else \
- { \
- len = __wcsrtombs (NULL, &s2, 0, &mbstate); \
- if (len != (size_t) -1) \
- { \
- assert (__mbsinit (&mbstate)); \
- s2 = (const wchar_t *) string; \
- if (__libc_use_alloca (len + 1)) \
- string = (char *) alloca (len + 1); \
- else if ((string = (char *) malloc (len + 1)) == NULL) \
- { \
- done = -1; \
- goto all_done; \
- } \
- else \
- string_malloced = 1; \
- (void) __wcsrtombs (string, &s2, len + 1, &mbstate); \
- } \
- } \
- \
- if (len == (size_t) -1) \
- { \
- /* Illegal wide-character string. */ \
- done = -1; \
- goto all_done; \
- } \
+ done = outstring_converted_wide_string \
+ (s, (const wchar_t *) string, prec, width, left, done); \
+ if (done < 0) \
+ goto all_done; \
+ /* The padding has already been written. */ \
+ break; \
} \
\
if ((width -= len) < 0) \
@@ -1211,8 +1255,6 @@ static const uint8_t jump_table[] =
outstring (string, len); \
if (left) \
PAD (' '); \
- if (__glibc_unlikely (string_malloced)) \
- free (string); \
} \
break;
#endif