This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC][PATCH 4/7] Use the alternative (genitive case) month names (bug 10871)


Some languages (Slavic, Baltic, etc.) require a genitive case of the
month name when formatting a full date (with the day number) while
they require a nominative case when referring to the month standalone.

strftime() now analyses the context where the month names appears
in the format string and tries to choose whether the basic (nominative)
or alternative (genitive) month name should be used.

strptime() now accepts both nominative and genitive month names.

2016-01-08  Rafal Luzynski  <digitalfreak@lingonborough.com>
	[BZ #10871]
	* time/strftime_l.c: alternative (genitive) month names supported, a smart
	  algorithm decides which form is correct in the current context.
	* time/strptime_l.c: alternative (genitive) month names also recognized.
---
 time/strftime_l.c | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 time/strptime_l.c |  25 +++++++++
 2 files changed, 179 insertions(+), 1 deletion(-)

diff --git a/time/strftime_l.c b/time/strftime_l.c
index 1205035..c3ab832 100644
--- a/time/strftime_l.c
+++ b/time/strftime_l.c
@@ -72,6 +72,7 @@ extern char *tzname[];
 #  define mbstate_t int
 #  define mbrlen(s, n, ps) mblen (s, n)
 #  define mbsinit(ps) (*(ps) == 0)
+#  define mbrtowc(pwc, s, n, ps) mbtowc (pwc, s, n)
 # endif
   static const mbstate_t mbstate_zero;
 #endif
@@ -432,6 +433,125 @@ static CHAR_T const month_name[][10] =
 # define ut 0
 #endif
 
+/* Check if the characters pointed to by S are a valid suffix: if they
+   start with a valid letter (in the current locale) or a single
+   punctuation character (a dash, an apostrophe, etc.) followed by
+   a letter, and they are not a format specifier. If a suffix is appended
+   at the end of a month name then probably a caller already tries to
+   workaround the problem of missing genitive forms so we should not
+   interfere with that.  */
+static bool is_suffix (const CHAR_T *s)
+{
+#if defined COMPILE_WIDE || DO_MULTIBYTE
+# define ISALPHA iswalpha
+# define ISPUNCT iswpunct
+#else
+# define ISALPHA isalpha
+# define ISPUNCT ispunct
+#endif
+
+#if DO_MULTIBYTE && !defined COMPILE_WIDE
+  wchar_t c, next_char;
+  size_t mb_char_len, mb_next_char_len;
+  mbstate_t mbstate = mbstate_zero;
+
+  mb_char_len = mbrtowc (&c, s, strlen (s), &mbstate);
+  if ((ssize_t) mb_char_len <= 0)
+    c = L_('\0');
+
+  if (c == L_('\0'))
+    {
+      next_char = L_('\0');
+      mb_next_char_len = 0;
+    }
+  else
+    {
+      mbsinit (&mbstate);
+      mb_next_char_len = mbrtowc (&next_char, s + mb_char_len,
+				  strlen (s + mb_char_len), &mbstate);
+      if ((ssize_t) mb_next_char_len <= 0)
+        next_char = L_('\0');
+    }
+# define NEXT_CHAR_OFFSET mb_char_len
+# define NEXT_NEXT_CHAR_OFFSET (mb_char_len + mb_next_char_len)
+#else
+  const CHAR_T c = s[0];
+  const CHAR_T next_char = c == L_('\0') ? L_('\0') : s[1];
+# define NEXT_CHAR_OFFSET 1
+# define NEXT_NEXT_CHAR_OFFSET 2
+#endif
+
+  if (ISALPHA (c))
+    return true;
+
+  if (c == L_('%'))
+    {
+      if (next_char == L_('%'))
+	return is_suffix (s + NEXT_NEXT_CHAR_OFFSET);
+      else
+	return false;
+    }
+
+  if (ISPUNCT (c))
+    return is_suffix (s + NEXT_CHAR_OFFSET);
+
+  return false;
+}
+
+/* Checks if the nearest format specifier in the format string pointed
+   to by Sis a day of the month specifier: "%e" or "%d". The format
+   specifier may be preceded with any number of other characters and may
+   contain modifiers. This function does not check if the modifiers are
+   valid and actually produce the day number so the result may be sometimes
+   wrong but in these cases the output will be wrong anyway because the
+   format is wrong.  */
+static bool next_is_day (const CHAR_T *s)
+{
+  /* Skip until the percent sign is found.  */
+  while (1)
+    {
+      switch (*s++)
+	{
+	case L_('\0'):
+	    return false;
+	case L_('%'):
+	    if (*s == L_('%'))
+	      {
+		++s;
+		continue;
+	      }
+	    break;
+	default:
+	    continue;
+	}
+      break;
+    }
+
+  while (1)
+    {
+      switch (*s++)
+	{
+	/* Skip the modifiers. Here we don't check if they are valid.  */
+	case L_('_'):
+	case L_('-'):
+	case L_('^'):
+	case L_('#'):
+	case L_('E'): case L_('O'):
+	case L_('0'): case L_('1'): case L_('2'): case L_('3'): case L_('4'):
+	case L_('5'): case L_('6'): case L_('7'): case L_('8'): case L_('9'):
+	    continue;
+
+	/* Day found.  */
+	case L_('d'): case L_('e'):
+	    return true;
+
+	/* Something else found, including the terminating zero.  */
+	default:
+	    return false;
+	}
+    }
+}
+
 static size_t __strftime_internal (CHAR_T *, size_t, const CHAR_T *,
 				   const struct tm *, bool *
 				   ut_argument_spec
@@ -492,6 +612,9 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T
*format,
 # define f_month \
   ((const CHAR_T *) (tp->tm_mon < 0 || tp->tm_mon > 11			     \
 		     ? "?" : _NL_CURRENT (LC_TIME, NLW(MON_1) + tp->tm_mon)))
+# define f_altmonth \
+  ((const CHAR_T *) (tp->tm_mon < 0 || tp->tm_mon > 11			     \
+		     ? "?" : _NL_CURRENT (LC_TIME, NLW(ALTMON_1) + tp->tm_mon)))
 # define ampm \
   ((const CHAR_T *) _NL_CURRENT (LC_TIME, tp->tm_hour > 11		      \
 				 ? NLW(PM_STR) : NLW(AM_STR)))
@@ -499,6 +622,8 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T
*format,
 # define aw_len STRLEN (a_wkday)
 # define am_len STRLEN (a_month)
 # define ap_len STRLEN (ampm)
+# define day_month_order \
+  (current->values[_NL_ITEM_INDEX (_NL_DAY_MONTH_ORDER)].string[0])
 #else
 # if !HAVE_STRFTIME
 #  define f_wkday (tp->tm_wday < 0 || tp->tm_wday > 6	\
@@ -507,13 +632,16 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const
CHAR_T *format,
 		   ? "?" : month_name[tp->tm_mon])
 #  define a_wkday f_wkday
 #  define a_month f_month
+#  define f_altmonth (L_(""))
 #  define ampm (L_("AMPM") + 2 * (tp->tm_hour > 11))
 
   size_t aw_len = 3;
   size_t am_len = 3;
   size_t ap_len = 2;
+# define day_month_order 2
 # endif
 #endif
+  int last_was_day = 0;
   const char *zone;
   size_t i = 0;
   CHAR_T *p = s;
@@ -783,7 +911,20 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const
CHAR_T *format,
 	      to_lowcase = 0;
 	    }
 #if defined _NL_CURRENT || !HAVE_STRFTIME
-	  cpy (STRLEN (f_month), f_month);
+	  /* Use f_altmonth only if f_altmonth is provided.  */
+	  if (f_altmonth[0]
+	  /* Don't use f_altmonth if there is a suffix after %B because
+	     it means that a caller already provides its own workaround.  */
+	      && !is_suffix (f + 1)
+	  /* Use f_altmonth if the day-month order is valid and the last
+	     format specifier was a day specifier.  */
+	      && ( (day_month_order <= 2 && last_was_day)
+	  /* Use f_altmonth if the month-day order is valid and the next
+	     format specifier will be a day specifier.  */
+		  || (day_month_order >= 2 && next_is_day (f + 1)) ))
+	    cpy (STRLEN (f_altmonth), f_altmonth);
+	  else
+	    cpy (STRLEN (f_month), f_month);
 	  break;
 #else
 	  goto underlying_strftime;
@@ -1400,6 +1541,18 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const
CHAR_T *format,
 	  }
 	  break;
 	}
+
+      /* Update the previous_was_day flag. */
+      switch (format_char)
+	{
+	case L_('d'):
+	case L_('e'):
+	  last_was_day = 1;
+	  break;
+	default:
+	  last_was_day = 0;
+	  break;
+	}
     }
 
   if (p && maxsize != 0)
diff --git a/time/strptime_l.c b/time/strptime_l.c
index 3a56947..9583211 100644
--- a/time/strptime_l.c
+++ b/time/strptime_l.c
@@ -124,6 +124,8 @@ extern const struct __locale_data _nl_C_LC_TIME
attribute_hidden;
   (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (ABDAY_1)].string)
 # define month_name (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (MON_1)].string)
 # define ab_month_name (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (ABMON_1)].string)
+# define alt_month_name \
+  (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (ALTMON_1)].string)
 # define HERE_D_T_FMT (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (D_T_FMT)].string)
 # define HERE_D_FMT (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (D_FMT)].string)
 # define HERE_AM_STR (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (AM_STR)].string)
@@ -153,6 +155,11 @@ static char const ab_month_name[][4] =
     "Jan", "Feb", "Mar", "Apr", "May", "Jun",
     "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
   };
+static char const alt_month_name[][10] =
+  {
+    "January", "February", "March", "April", "May", "June",
+    "July", "August", "September", "October", "November", "December"
+  };
 # define HERE_D_T_FMT "%a %b %e %H:%M:%S %Y"
 # define HERE_D_FMT "%m/%d/%y"
 # define HERE_AM_STR "AM"
@@ -403,6 +410,20 @@ __strptime_internal (const char *rp, const char *fmt,
struct tm *tmp,
 	      if (s.decided !=raw)
 		{
 		  trp = rp;
+		  /* First check if the alt month is provided.  */
+		  if (_NL_CURRENT (LC_TIME, ALTMON_1 + cnt)
+		      && * (_NL_CURRENT (LC_TIME, ALTMON_1 + cnt))
+		      && match_string (_NL_CURRENT (LC_TIME, ALTMON_1 + cnt), trp)
+		      && trp > rp_longest)
+		    {
+		      rp_longest = trp;
+		      cnt_longest = cnt;
+		      if (s.decided == not
+			  && strcmp (_NL_CURRENT (LC_TIME, ALTMON_1 + cnt),
+				     alt_month_name[cnt]))
+			decided_longest = loc;
+		    }
+		  trp = rp;
 		  if (match_string (_NL_CURRENT (LC_TIME, MON_1 + cnt), trp)
 		      && trp > rp_longest)
 		    {
@@ -429,6 +450,10 @@ __strptime_internal (const char *rp, const char *fmt,
struct tm *tmp,
 	      if (s.decided != loc
 		  && (((trp = rp, match_string (month_name[cnt], trp))
 		       && trp > rp_longest)
+		      || ((trp = rp, alt_month_name[cnt]
+			  && alt_month_name[cnt][0]
+			  && match_string (alt_month_name[cnt], trp))
+			  && trp > rp_longest)
 		      || ((trp = rp, match_string (ab_month_name[cnt], trp))
 			  && trp > rp_longest)))
 		{
-- 
2.5.0


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]