This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: [PATCH] Add Roman numerals in *printf

From: Adhemerval Zanella <adhemerval dot zanella at linaro dot org>
To: libc-alpha at sourceware dot org
Date: Fri, 1 Apr 2016 09:52:27 -0300
Subject: Re: [PATCH] Add Roman numerals in *printf
Authentication-results: sourceware.org; auth=none
References: <1459503215-21039-1-git-send-email-matteo at openwrt dot org>
AFAIK this format is not defined in any standard or specification GLIBC
follows neither I think it is worth an GNU extension.  There are some
problems in adding an intrinsic modifier that are not supported in
others platforms: compatibility, the modifier can be defined in a future
POSIX standard, how to enable (with _GNU_SOURCE of something), etc.

You can implement it as printf customization [1], which is far from
perfect but with some care gets the job done.

[1] http://www.gnu.org/software/libc/manual/html_node/Customizing-Printf.html


On 01-04-2016 06:33, Matteo Croce wrote:
> Since Fibonacci adoption in the 13th century, Arabic numbers have become
> the standard numeric system used in every culture.
> However, older numeral systems are yet used in many contexts,
> for example, ancient Roman numerals are often used to represent
> software version, protocol revision or even movie chapters.
> 
> This patch adds the `%r' modifier in all the *printf functions,
> which can be used to represent a number in Roman numerals.
> This has two big advantages:
> 
> first of all there is no need to hardcode text strings in
> the code which needs to be updated on every version change.
> e.g. printf("System V booting") or printf("Text Editor for OS X")
> can be replaced with:
> printf("System %R booting", ver) and printf("Text Editor for OS %R", ver)
> and the right version string is generated at runtime.
> 
> The second advantage is that Roman numerals are very lengthy and even a small
> 16 bit number can occupy up to 15 bytes,
> thus leading to a 650% increase in code size.
> 
> For a technical limitation the maximum number that can be represented in Roman
> numerals are limited to 3999, but archaeologist agrees that ancient Romans uses
> a superscript to multiply by 1000 and a vertical line to multiply to 1 million.
> 
> To avoid using Unicode characters in such basic IO routines, a pair of _ are
> used to represent a thousand value, e.g. _XX_ for 20 000
> and a | to represent millions, e.g. |L| for 50 000 000. This increase the
> maximum integer that can be represented to 499 999 999 which should be
> a reasonable value for our concern, bigger numbers will print as `(infinitum)'
> 
> Another issue is the zero value which was missing in the Roman system,
> indeed the zero sign was "imported" by Fibonacci from North Africa,
> so when trying to print 0 as Roman numeral, the string `(nihil)',
> which is the Latin word for `nothing', will printed like `(null)` for pointers.
> 
> Roman numerals can be generated both in upper and lower case
> respectively, with the `%R' and `%r' modifier.
> ---
>  stdio-common/vfprintf.c | 109 +++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 93 insertions(+), 16 deletions(-)
> 
> diff --git a/stdio-common/vfprintf.c b/stdio-common/vfprintf.c
> index 6829d4d..e85c29c 100644
> --- a/stdio-common/vfprintf.c
> +++ b/stdio-common/vfprintf.c
> @@ -215,20 +215,20 @@ static const uint8_t jump_table[] =
>      /* '4' */  8, /* '5' */  8, /* '6' */  8, /* '7' */  8,
>      /* '8' */  8, /* '9' */  8,            0,            0,
>  	       0,            0,            0,            0,
> -	       0, /* 'A' */ 26,            0, /* 'C' */ 25,
> -	       0, /* 'E' */ 19, /* F */   19, /* 'G' */ 19,
> -	       0, /* 'I' */ 29,            0,            0,
> +	       0, /* 'A' */ 27,            0, /* 'C' */ 26,
> +	       0, /* 'E' */ 20, /* F */   20, /* 'G' */ 20,
> +	       0, /* 'I' */ 30,            0,            0,
>      /* 'L' */ 12,            0,            0,            0,
> -	       0,            0,            0, /* 'S' */ 21,
> +	       0,            0, /* 'R' */ 19, /* 'S' */ 22,
>  	       0,            0,            0,            0,
>      /* 'X' */ 18,            0, /* 'Z' */ 13,            0,
>  	       0,            0,            0,            0,
> -	       0, /* 'a' */ 26,            0, /* 'c' */ 20,
> -    /* 'd' */ 15, /* 'e' */ 19, /* 'f' */ 19, /* 'g' */ 19,
> -    /* 'h' */ 10, /* 'i' */ 15, /* 'j' */ 28,            0,
> -    /* 'l' */ 11, /* 'm' */ 24, /* 'n' */ 23, /* 'o' */ 17,
> -    /* 'p' */ 22, /* 'q' */ 12,            0, /* 's' */ 21,
> -    /* 't' */ 27, /* 'u' */ 16,            0,            0,
> +	       0, /* 'a' */ 27,            0, /* 'c' */ 21,
> +    /* 'd' */ 15, /* 'e' */ 20, /* 'f' */ 20, /* 'g' */ 20,
> +    /* 'h' */ 10, /* 'i' */ 15, /* 'j' */ 29,            0,
> +    /* 'l' */ 11, /* 'm' */ 25, /* 'n' */ 24, /* 'o' */ 17,
> +    /* 'p' */ 23, /* 'q' */ 12, /* 'r' */ 19, /* 's' */ 22,
> +    /* 't' */ 28, /* 'u' */ 16,            0,            0,
>      /* 'x' */ 18,            0, /* 'z' */ 13
>    };
>  
> @@ -269,7 +269,7 @@ static const uint8_t jump_table[] =
>  
>  #define STEP0_3_TABLE							      \
>      /* Step 0: at the beginning.  */					      \
> -    static JUMP_TABLE_TYPE step0_jumps[30] =				      \
> +    static JUMP_TABLE_TYPE step0_jumps[31] =				      \
>      {									      \
>        REF (form_unknown),						      \
>        REF (flag_space),		/* for ' ' */				      \
> @@ -290,6 +290,7 @@ static const uint8_t jump_table[] =
>        REF (form_unsigned),	/* for 'u' */				      \
>        REF (form_octal),		/* for 'o' */				      \
>        REF (form_hexa),		/* for 'X', 'x' */			      \
> +      REF (form_roman),		/* for 'R', 'r' */			      \
>        REF (form_float),		/* for 'E', 'e', 'F', 'f', 'G', 'g' */	      \
>        REF (form_character),	/* for 'c' */				      \
>        REF (form_string),	/* for 's', 'S' */			      \
> @@ -303,7 +304,7 @@ static const uint8_t jump_table[] =
>        REF (flag_i18n),		/* for 'I' */				      \
>      };									      \
>      /* Step 1: after processing width.  */				      \
> -    static JUMP_TABLE_TYPE step1_jumps[30] =				      \
> +    static JUMP_TABLE_TYPE step1_jumps[31] =				      \
>      {									      \
>        REF (form_unknown),						      \
>        REF (form_unknown),	/* for ' ' */				      \
> @@ -324,6 +325,7 @@ static const uint8_t jump_table[] =
>        REF (form_unsigned),	/* for 'u' */				      \
>        REF (form_octal),		/* for 'o' */				      \
>        REF (form_hexa),		/* for 'X', 'x' */			      \
> +      REF (form_roman),		/* for 'R', 'r' */			      \
>        REF (form_float),		/* for 'E', 'e', 'F', 'f', 'G', 'g' */	      \
>        REF (form_character),	/* for 'c' */				      \
>        REF (form_string),	/* for 's', 'S' */			      \
> @@ -337,7 +339,7 @@ static const uint8_t jump_table[] =
>        REF (form_unknown)        /* for 'I' */				      \
>      };									      \
>      /* Step 2: after processing precision.  */				      \
> -    static JUMP_TABLE_TYPE step2_jumps[30] =				      \
> +    static JUMP_TABLE_TYPE step2_jumps[31] =				      \
>      {									      \
>        REF (form_unknown),						      \
>        REF (form_unknown),	/* for ' ' */				      \
> @@ -358,6 +360,7 @@ static const uint8_t jump_table[] =
>        REF (form_unsigned),	/* for 'u' */				      \
>        REF (form_octal),		/* for 'o' */				      \
>        REF (form_hexa),		/* for 'X', 'x' */			      \
> +      REF (form_roman),		/* for 'R', 'r' */			      \
>        REF (form_float),		/* for 'E', 'e', 'F', 'f', 'G', 'g' */	      \
>        REF (form_character),	/* for 'c' */				      \
>        REF (form_string),	/* for 's', 'S' */			      \
> @@ -371,7 +374,7 @@ static const uint8_t jump_table[] =
>        REF (form_unknown)        /* for 'I' */				      \
>      };									      \
>      /* Step 3a: after processing first 'h' modifier.  */		      \
> -    static JUMP_TABLE_TYPE step3a_jumps[30] =				      \
> +    static JUMP_TABLE_TYPE step3a_jumps[31] =				      \
>      {									      \
>        REF (form_unknown),						      \
>        REF (form_unknown),	/* for ' ' */				      \
> @@ -392,6 +395,7 @@ static const uint8_t jump_table[] =
>        REF (form_unsigned),	/* for 'u' */				      \
>        REF (form_octal),		/* for 'o' */				      \
>        REF (form_hexa),		/* for 'X', 'x' */			      \
> +      REF (form_roman),		/* for 'R', 'r' */			      \
>        REF (form_unknown),	/* for 'E', 'e', 'F', 'f', 'G', 'g' */	      \
>        REF (form_unknown),	/* for 'c' */				      \
>        REF (form_unknown),	/* for 's', 'S' */			      \
> @@ -405,7 +409,7 @@ static const uint8_t jump_table[] =
>        REF (form_unknown)        /* for 'I' */				      \
>      };									      \
>      /* Step 3b: after processing first 'l' modifier.  */		      \
> -    static JUMP_TABLE_TYPE step3b_jumps[30] =				      \
> +    static JUMP_TABLE_TYPE step3b_jumps[31] =				      \
>      {									      \
>        REF (form_unknown),						      \
>        REF (form_unknown),	/* for ' ' */				      \
> @@ -426,6 +430,7 @@ static const uint8_t jump_table[] =
>        REF (form_unsigned),	/* for 'u' */				      \
>        REF (form_octal),		/* for 'o' */				      \
>        REF (form_hexa),		/* for 'X', 'x' */			      \
> +      REF (form_roman),		/* for 'R', 'r' */			      \
>        REF (form_float),		/* for 'E', 'e', 'F', 'f', 'G', 'g' */	      \
>        REF (form_character),	/* for 'c' */				      \
>        REF (form_string),	/* for 's', 'S' */			      \
> @@ -441,7 +446,7 @@ static const uint8_t jump_table[] =
>  
>  #define STEP4_TABLE							      \
>      /* Step 4: processing format specifier.  */				      \
> -    static JUMP_TABLE_TYPE step4_jumps[30] =				      \
> +    static JUMP_TABLE_TYPE step4_jumps[31] =				      \
>      {									      \
>        REF (form_unknown),						      \
>        REF (form_unknown),	/* for ' ' */				      \
> @@ -462,6 +467,7 @@ static const uint8_t jump_table[] =
>        REF (form_unsigned),	/* for 'u' */				      \
>        REF (form_octal),		/* for 'o' */				      \
>        REF (form_hexa),		/* for 'X', 'x' */			      \
> +      REF (form_roman),		/* for 'R', 'r' */			      \
>        REF (form_float),		/* for 'E', 'e', 'F', 'f', 'G', 'g' */	      \
>        REF (form_character),	/* for 'c' */				      \
>        REF (form_string),	/* for 's', 'S' */			      \
> @@ -741,6 +747,11 @@ static const uint8_t jump_table[] =
>  	  break;							      \
>  	}								      \
>  									      \
> +    LABEL (form_roman):							      \
> +      /* Ancient Roman / Latin number. */				      \
> +      roman(s, va_arg (ap, int), spec == L_('R'));			      \
> +      break;							              \
> +									      \
>      LABEL (form_float):							      \
>        {									      \
>  	/* Floating-point number.  This is handled by printf_fp.c.  */	      \
> @@ -1210,6 +1221,72 @@ static const uint8_t jump_table[] =
>        break;
>  #endif
>  
> +static const int numbers[] = { 1000, 900, 500, 400, 100, 90,
> +			       50, 40, 10, 9, 5, 4, 1 };
> +static const CHAR_T *_rul[] = { L_("M"), L_("CM"), L_("D"), L_("CD"), L_("C"),
> +			      L_("XC"), L_("L"), L_("XL"), L_("X"),
> +			      L_("IX"), L_("V"), L_("IV"), L_("I") };
> +static const CHAR_T *_rll[] = { L_("m"), L_("cm"), L_("d"), L_("cd"), L_("c"),
> +			      L_("xc"), L_("l"), L_("xl"), L_("x"),
> +			      L_("ix"), L_("v"), L_("iv"), L_("i") };
> +
> +static void roman(FILE *s, long int num, int upper_case)
> +{
> +  /* used by outchar */
> +  int done = 0;
> +
> +  const CHAR_T **letters = upper_case ? _rul : _rll;
> +
> +  if(!num)
> +    {
> +      outstring(L_("(nihil)"), 7);
> +      return;
> +    }
> +
> +  if(num < 0)
> +    {
> +      outchar(L_('-'));
> +      num = -num;
> +    }
> +
> +  if(num > 499999999)
> +    {
> +      outstring(L_("(infinitum)"), 11);
> +      return;
> +    }
> +
> +  if(num > 99999)
> +    {
> +      outchar(L_('|'));
> +      roman(s, num / 100000, upper_case);
> +      outchar(L_('|'));
> +      num %= 100000;
> +    }
> +
> +  if(num > 4999)
> +    {
> +      outchar(L_('_'));
> +      roman(s, num / 1000, upper_case);
> +      outchar(L_('_'));
> +      num %= 1000;
> +    }
> +
> +  for (int i = 0; i < sizeof(numbers) / sizeof(*numbers); i++)
> +    {
> +      while (num >= numbers[i])
> +	{
> +	  outchar(letters[i][0]);
> +	  if(letters[i][1])
> +	  outchar(letters[i][1]);
> +	  num -= numbers[i];
> +	}
> +    }
> +
> +  /* suppress warnings */
> +  all_done:
> +  return;
> +}
> +
>  /* Helper function to provide temporary buffering for unbuffered streams.  */
>  static int buffered_vfprintf (FILE *stream, const CHAR_T *fmt, va_list)
>       __THROW __attribute__ ((noinline)) internal_function;
>
References:
- [PATCH] Add Roman numerals in *printf
  - From: Matteo Croce
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]