This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] Add Roman numerals in *printf
- From: Adhemerval Zanella <adhemerval dot zanella at linaro dot org>
- To: libc-alpha at sourceware dot org
- Date: Fri, 1 Apr 2016 09:52:27 -0300
- Subject: Re: [PATCH] Add Roman numerals in *printf
- Authentication-results: sourceware.org; auth=none
- References: <1459503215-21039-1-git-send-email-matteo at openwrt dot org>
AFAIK this format is not defined in any standard or specification GLIBC
follows neither I think it is worth an GNU extension. There are some
problems in adding an intrinsic modifier that are not supported in
others platforms: compatibility, the modifier can be defined in a future
POSIX standard, how to enable (with _GNU_SOURCE of something), etc.
You can implement it as printf customization [1], which is far from
perfect but with some care gets the job done.
[1] http://www.gnu.org/software/libc/manual/html_node/Customizing-Printf.html
On 01-04-2016 06:33, Matteo Croce wrote:
> Since Fibonacci adoption in the 13th century, Arabic numbers have become
> the standard numeric system used in every culture.
> However, older numeral systems are yet used in many contexts,
> for example, ancient Roman numerals are often used to represent
> software version, protocol revision or even movie chapters.
>
> This patch adds the `%r' modifier in all the *printf functions,
> which can be used to represent a number in Roman numerals.
> This has two big advantages:
>
> first of all there is no need to hardcode text strings in
> the code which needs to be updated on every version change.
> e.g. printf("System V booting") or printf("Text Editor for OS X")
> can be replaced with:
> printf("System %R booting", ver) and printf("Text Editor for OS %R", ver)
> and the right version string is generated at runtime.
>
> The second advantage is that Roman numerals are very lengthy and even a small
> 16 bit number can occupy up to 15 bytes,
> thus leading to a 650% increase in code size.
>
> For a technical limitation the maximum number that can be represented in Roman
> numerals are limited to 3999, but archaeologist agrees that ancient Romans uses
> a superscript to multiply by 1000 and a vertical line to multiply to 1 million.
>
> To avoid using Unicode characters in such basic IO routines, a pair of _ are
> used to represent a thousand value, e.g. _XX_ for 20 000
> and a | to represent millions, e.g. |L| for 50 000 000. This increase the
> maximum integer that can be represented to 499 999 999 which should be
> a reasonable value for our concern, bigger numbers will print as `(infinitum)'
>
> Another issue is the zero value which was missing in the Roman system,
> indeed the zero sign was "imported" by Fibonacci from North Africa,
> so when trying to print 0 as Roman numeral, the string `(nihil)',
> which is the Latin word for `nothing', will printed like `(null)` for pointers.
>
> Roman numerals can be generated both in upper and lower case
> respectively, with the `%R' and `%r' modifier.
> ---
> stdio-common/vfprintf.c | 109 +++++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 93 insertions(+), 16 deletions(-)
>
> diff --git a/stdio-common/vfprintf.c b/stdio-common/vfprintf.c
> index 6829d4d..e85c29c 100644
> --- a/stdio-common/vfprintf.c
> +++ b/stdio-common/vfprintf.c
> @@ -215,20 +215,20 @@ static const uint8_t jump_table[] =
> /* '4' */ 8, /* '5' */ 8, /* '6' */ 8, /* '7' */ 8,
> /* '8' */ 8, /* '9' */ 8, 0, 0,
> 0, 0, 0, 0,
> - 0, /* 'A' */ 26, 0, /* 'C' */ 25,
> - 0, /* 'E' */ 19, /* F */ 19, /* 'G' */ 19,
> - 0, /* 'I' */ 29, 0, 0,
> + 0, /* 'A' */ 27, 0, /* 'C' */ 26,
> + 0, /* 'E' */ 20, /* F */ 20, /* 'G' */ 20,
> + 0, /* 'I' */ 30, 0, 0,
> /* 'L' */ 12, 0, 0, 0,
> - 0, 0, 0, /* 'S' */ 21,
> + 0, 0, /* 'R' */ 19, /* 'S' */ 22,
> 0, 0, 0, 0,
> /* 'X' */ 18, 0, /* 'Z' */ 13, 0,
> 0, 0, 0, 0,
> - 0, /* 'a' */ 26, 0, /* 'c' */ 20,
> - /* 'd' */ 15, /* 'e' */ 19, /* 'f' */ 19, /* 'g' */ 19,
> - /* 'h' */ 10, /* 'i' */ 15, /* 'j' */ 28, 0,
> - /* 'l' */ 11, /* 'm' */ 24, /* 'n' */ 23, /* 'o' */ 17,
> - /* 'p' */ 22, /* 'q' */ 12, 0, /* 's' */ 21,
> - /* 't' */ 27, /* 'u' */ 16, 0, 0,
> + 0, /* 'a' */ 27, 0, /* 'c' */ 21,
> + /* 'd' */ 15, /* 'e' */ 20, /* 'f' */ 20, /* 'g' */ 20,
> + /* 'h' */ 10, /* 'i' */ 15, /* 'j' */ 29, 0,
> + /* 'l' */ 11, /* 'm' */ 25, /* 'n' */ 24, /* 'o' */ 17,
> + /* 'p' */ 23, /* 'q' */ 12, /* 'r' */ 19, /* 's' */ 22,
> + /* 't' */ 28, /* 'u' */ 16, 0, 0,
> /* 'x' */ 18, 0, /* 'z' */ 13
> };
>
> @@ -269,7 +269,7 @@ static const uint8_t jump_table[] =
>
> #define STEP0_3_TABLE \
> /* Step 0: at the beginning. */ \
> - static JUMP_TABLE_TYPE step0_jumps[30] = \
> + static JUMP_TABLE_TYPE step0_jumps[31] = \
> { \
> REF (form_unknown), \
> REF (flag_space), /* for ' ' */ \
> @@ -290,6 +290,7 @@ static const uint8_t jump_table[] =
> REF (form_unsigned), /* for 'u' */ \
> REF (form_octal), /* for 'o' */ \
> REF (form_hexa), /* for 'X', 'x' */ \
> + REF (form_roman), /* for 'R', 'r' */ \
> REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \
> REF (form_character), /* for 'c' */ \
> REF (form_string), /* for 's', 'S' */ \
> @@ -303,7 +304,7 @@ static const uint8_t jump_table[] =
> REF (flag_i18n), /* for 'I' */ \
> }; \
> /* Step 1: after processing width. */ \
> - static JUMP_TABLE_TYPE step1_jumps[30] = \
> + static JUMP_TABLE_TYPE step1_jumps[31] = \
> { \
> REF (form_unknown), \
> REF (form_unknown), /* for ' ' */ \
> @@ -324,6 +325,7 @@ static const uint8_t jump_table[] =
> REF (form_unsigned), /* for 'u' */ \
> REF (form_octal), /* for 'o' */ \
> REF (form_hexa), /* for 'X', 'x' */ \
> + REF (form_roman), /* for 'R', 'r' */ \
> REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \
> REF (form_character), /* for 'c' */ \
> REF (form_string), /* for 's', 'S' */ \
> @@ -337,7 +339,7 @@ static const uint8_t jump_table[] =
> REF (form_unknown) /* for 'I' */ \
> }; \
> /* Step 2: after processing precision. */ \
> - static JUMP_TABLE_TYPE step2_jumps[30] = \
> + static JUMP_TABLE_TYPE step2_jumps[31] = \
> { \
> REF (form_unknown), \
> REF (form_unknown), /* for ' ' */ \
> @@ -358,6 +360,7 @@ static const uint8_t jump_table[] =
> REF (form_unsigned), /* for 'u' */ \
> REF (form_octal), /* for 'o' */ \
> REF (form_hexa), /* for 'X', 'x' */ \
> + REF (form_roman), /* for 'R', 'r' */ \
> REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \
> REF (form_character), /* for 'c' */ \
> REF (form_string), /* for 's', 'S' */ \
> @@ -371,7 +374,7 @@ static const uint8_t jump_table[] =
> REF (form_unknown) /* for 'I' */ \
> }; \
> /* Step 3a: after processing first 'h' modifier. */ \
> - static JUMP_TABLE_TYPE step3a_jumps[30] = \
> + static JUMP_TABLE_TYPE step3a_jumps[31] = \
> { \
> REF (form_unknown), \
> REF (form_unknown), /* for ' ' */ \
> @@ -392,6 +395,7 @@ static const uint8_t jump_table[] =
> REF (form_unsigned), /* for 'u' */ \
> REF (form_octal), /* for 'o' */ \
> REF (form_hexa), /* for 'X', 'x' */ \
> + REF (form_roman), /* for 'R', 'r' */ \
> REF (form_unknown), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \
> REF (form_unknown), /* for 'c' */ \
> REF (form_unknown), /* for 's', 'S' */ \
> @@ -405,7 +409,7 @@ static const uint8_t jump_table[] =
> REF (form_unknown) /* for 'I' */ \
> }; \
> /* Step 3b: after processing first 'l' modifier. */ \
> - static JUMP_TABLE_TYPE step3b_jumps[30] = \
> + static JUMP_TABLE_TYPE step3b_jumps[31] = \
> { \
> REF (form_unknown), \
> REF (form_unknown), /* for ' ' */ \
> @@ -426,6 +430,7 @@ static const uint8_t jump_table[] =
> REF (form_unsigned), /* for 'u' */ \
> REF (form_octal), /* for 'o' */ \
> REF (form_hexa), /* for 'X', 'x' */ \
> + REF (form_roman), /* for 'R', 'r' */ \
> REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \
> REF (form_character), /* for 'c' */ \
> REF (form_string), /* for 's', 'S' */ \
> @@ -441,7 +446,7 @@ static const uint8_t jump_table[] =
>
> #define STEP4_TABLE \
> /* Step 4: processing format specifier. */ \
> - static JUMP_TABLE_TYPE step4_jumps[30] = \
> + static JUMP_TABLE_TYPE step4_jumps[31] = \
> { \
> REF (form_unknown), \
> REF (form_unknown), /* for ' ' */ \
> @@ -462,6 +467,7 @@ static const uint8_t jump_table[] =
> REF (form_unsigned), /* for 'u' */ \
> REF (form_octal), /* for 'o' */ \
> REF (form_hexa), /* for 'X', 'x' */ \
> + REF (form_roman), /* for 'R', 'r' */ \
> REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \
> REF (form_character), /* for 'c' */ \
> REF (form_string), /* for 's', 'S' */ \
> @@ -741,6 +747,11 @@ static const uint8_t jump_table[] =
> break; \
> } \
> \
> + LABEL (form_roman): \
> + /* Ancient Roman / Latin number. */ \
> + roman(s, va_arg (ap, int), spec == L_('R')); \
> + break; \
> + \
> LABEL (form_float): \
> { \
> /* Floating-point number. This is handled by printf_fp.c. */ \
> @@ -1210,6 +1221,72 @@ static const uint8_t jump_table[] =
> break;
> #endif
>
> +static const int numbers[] = { 1000, 900, 500, 400, 100, 90,
> + 50, 40, 10, 9, 5, 4, 1 };
> +static const CHAR_T *_rul[] = { L_("M"), L_("CM"), L_("D"), L_("CD"), L_("C"),
> + L_("XC"), L_("L"), L_("XL"), L_("X"),
> + L_("IX"), L_("V"), L_("IV"), L_("I") };
> +static const CHAR_T *_rll[] = { L_("m"), L_("cm"), L_("d"), L_("cd"), L_("c"),
> + L_("xc"), L_("l"), L_("xl"), L_("x"),
> + L_("ix"), L_("v"), L_("iv"), L_("i") };
> +
> +static void roman(FILE *s, long int num, int upper_case)
> +{
> + /* used by outchar */
> + int done = 0;
> +
> + const CHAR_T **letters = upper_case ? _rul : _rll;
> +
> + if(!num)
> + {
> + outstring(L_("(nihil)"), 7);
> + return;
> + }
> +
> + if(num < 0)
> + {
> + outchar(L_('-'));
> + num = -num;
> + }
> +
> + if(num > 499999999)
> + {
> + outstring(L_("(infinitum)"), 11);
> + return;
> + }
> +
> + if(num > 99999)
> + {
> + outchar(L_('|'));
> + roman(s, num / 100000, upper_case);
> + outchar(L_('|'));
> + num %= 100000;
> + }
> +
> + if(num > 4999)
> + {
> + outchar(L_('_'));
> + roman(s, num / 1000, upper_case);
> + outchar(L_('_'));
> + num %= 1000;
> + }
> +
> + for (int i = 0; i < sizeof(numbers) / sizeof(*numbers); i++)
> + {
> + while (num >= numbers[i])
> + {
> + outchar(letters[i][0]);
> + if(letters[i][1])
> + outchar(letters[i][1]);
> + num -= numbers[i];
> + }
> + }
> +
> + /* suppress warnings */
> + all_done:
> + return;
> +}
> +
> /* Helper function to provide temporary buffering for unbuffered streams. */
> static int buffered_vfprintf (FILE *stream, const CHAR_T *fmt, va_list)
> __THROW __attribute__ ((noinline)) internal_function;
>