This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH v12 1/6] Implement alternative month names (bug 10871).


Some languages (Slavic, Baltic, etc.) require a genitive case of the
month name when formatting a full date (with the day number) while
they require a nominative case when referring to the month standalone.
This requirement cannot be fulfilled without providing two forms for
each month name.  From now it is specified that nl_langinfo(MON_1)
series (up to MON_12) and strftime("%B") generate the month names in
the grammatical form used when the month is a part of a complete date.
If the grammatical form used when the month is named by itself is needed,
the new values nl_langinfo(ALTMON_1) (up to ALTMON_12) and
strftime("%OB") are supported.  This new feature is optional so the
languages which do not need it or do not yet provide the updated
locales simply do not use it and their behaviour is unchanged.

	[BZ #10871]
	* locale/C-time.c (_nl_C_LC_TIME): Add alternative month names,
	define them as the same as primary full month names explicitly.
	* locale/categories.def (LC_TIME): Add alt_mon and wide-alt_mon.
	* locale/langinfo.h (__ALTMON_1, __ALTMON_2, __ALTMON_3, __ALTMON_4,
	__ALTMON_5, __ALTMON_6, __ALTMON_7, __ALTMON_8, __ALTMON_9, __ALTMON_10,
	__ALTMON_11, __ALTMON_12, _NL_WALTMON_1, _NL_WALTMON_2, _NL_WALTMON_3,
	_NL_WALTMON_4, _NL_WALTMON_5, _NL_WALTMON_6, _NL_WALTMON_7,
	_NL_WALTMON_8, _NL_WALTMON_9, _NL_WALTMON_10, _NL_WALTMON_11,
	_NL_WALTMON_12): New enum constants.
	[__USE_GNU] (ALTMON_1, ALTMON_2, ALTMON_3, ALTMON_4, ALTMON_5, ALTMON_6,
	ALTMON_7, ALTMON_8, ALTMON_9, ALTMON_10, ALTMON_11, ALTMON_12): New
	macros.
	* locale/programs/ld-time.c (struct locale_time_t): Add alt_mon,
	walt_mon, and alt_mon_defined members.
	(time_output): Output alt_mon and walt_mon members.
	(time_read): Read them, initialize them as copies of mon and wmon
	respectively if they are missing, initialize alt_mon_defined.
	* locale/programs/locfile-kw.gperf (alt_mon): Define.
	* locale/programs/locfile-token.h (tok_alt_mon): New enum constant.
	* localedata/tst-langinfo.c (map): Add tests for the new constants
	ALTMON_1 .. ALTMON_12.
	* time/Makefile [$(run-built-tests) = yes] (LOCALES): Add fr_FR.UTF-8
	and pl_PL.UTF-8.
	* time/strftime_l.c (f_altmonth): New macro.
	(__strftime_internal): Handle %OB format.
	* time/strptime_l.c [_LIBC] (alt_month_name): New macro.
	(__strptime_internal): Handle %OB format.
	* time/tst-strptime.c (day_tests): Add tests to parse different forms
	of month names including the new %OB format specifier.
---
 locale/C-time.c                  | 28 ++++++++++++++++++++--
 locale/categories.def            |  2 ++
 locale/langinfo.h                | 50 ++++++++++++++++++++++++++++++++++++++--
 locale/programs/ld-time.c        | 21 +++++++++++++++++
 locale/programs/locfile-kw.gperf |  1 +
 locale/programs/locfile-token.h  |  1 +
 localedata/tst-langinfo.c        | 12 ++++++++++
 time/Makefile                    |  3 ++-
 time/strftime_l.c                | 11 +++++++--
 time/strptime_l.c                | 32 +++++++++++++++++++++----
 time/tst-strptime.c              | 12 ++++++++++
 11 files changed, 162 insertions(+), 11 deletions(-)

diff --git a/locale/C-time.c b/locale/C-time.c
index 1f1ee01..73bc700 100644
--- a/locale/C-time.c
+++ b/locale/C-time.c
@@ -30,7 +30,7 @@ const struct __locale_data _nl_C_LC_TIME attribute_hidden =
   { NULL, },			/* no cached data */
   UNDELETABLE,
   0,
-  111,
+  135,
   {
     { .string = "Sun" },
     { .string = "Mon" },
@@ -142,6 +142,30 @@ const struct __locale_data _nl_C_LC_TIME attribute_hidden =
     { .string = "" },
     { .string = "%a %b %e %H:%M:%S %Z %Y" },
     { .wstr = (const uint32_t *) L"%a %b %e %H:%M:%S %Z %Y" },
-    { .string = _nl_C_codeset }
+    { .string = _nl_C_codeset },
+    { .string = "January" },
+    { .string = "February" },
+    { .string = "March" },
+    { .string = "April" },
+    { .string = "May" },
+    { .string = "June" },
+    { .string = "July" },
+    { .string = "August" },
+    { .string = "September" },
+    { .string = "October" },
+    { .string = "November" },
+    { .string = "December" },
+    { .wstr = (const uint32_t *) L"January" },
+    { .wstr = (const uint32_t *) L"February" },
+    { .wstr = (const uint32_t *) L"March" },
+    { .wstr = (const uint32_t *) L"April" },
+    { .wstr = (const uint32_t *) L"May" },
+    { .wstr = (const uint32_t *) L"June" },
+    { .wstr = (const uint32_t *) L"July" },
+    { .wstr = (const uint32_t *) L"August" },
+    { .wstr = (const uint32_t *) L"September" },
+    { .wstr = (const uint32_t *) L"October" },
+    { .wstr = (const uint32_t *) L"November" },
+    { .wstr = (const uint32_t *) L"December" }
   }
 };
diff --git a/locale/categories.def b/locale/categories.def
index 47947f7..3cbb4e6 100644
--- a/locale/categories.def
+++ b/locale/categories.def
@@ -249,6 +249,8 @@ DEFINE_CATEGORY
   DEFINE_ELEMENT (_DATE_FMT,                "date_fmt",            opt, string)
   DEFINE_ELEMENT (_NL_W_DATE_FMT,           "wide-date_fmt",       opt,
wstring)
   DEFINE_ELEMENT (_NL_TIME_CODESET,	    "time-codeset",	   std, string)
+  DEFINE_ELEMENT (ALTMON_1,       "alt_mon",       opt, stringarray, 12, 12)
+  DEFINE_ELEMENT (_NL_WALTMON_1,  "wide-alt_mon",  opt, wstringarray, 12, 12)
   ), NO_POSTLOAD)
 
 
diff --git a/locale/langinfo.h b/locale/langinfo.h
index 28a0a73..65374dd 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -100,7 +100,8 @@ enum
   ABMON_12,
 #define ABMON_12		ABMON_12
 
-  /* Long month names.  */
+  /* Long month names, in the grammatical form used when the month
+     is a part of a complete date.  */
   MON_1,			/* January */
 #define MON_1			MON_1
   MON_2,
@@ -189,7 +190,8 @@ enum
   _NL_WABMON_11,
   _NL_WABMON_12,
 
-  /* Long month names.  */
+  /* Long month names, in the grammatical form used when the month
+     is a part of a complete date.  */
   _NL_WMON_1,		/* January */
   _NL_WMON_2,
   _NL_WMON_3,
@@ -231,6 +233,50 @@ enum
 
   _NL_TIME_CODESET,
 
+  /* Long month names, in the grammatical form used when the month
+     is named by itself.  */
+  __ALTMON_1,			/* January */
+  __ALTMON_2,
+  __ALTMON_3,
+  __ALTMON_4,
+  __ALTMON_5,
+  __ALTMON_6,
+  __ALTMON_7,
+  __ALTMON_8,
+  __ALTMON_9,
+  __ALTMON_10,
+  __ALTMON_11,
+  __ALTMON_12,
+#ifdef __USE_GNU
+# define ALTMON_1		__ALTMON_1
+# define ALTMON_2		__ALTMON_2
+# define ALTMON_3		__ALTMON_3
+# define ALTMON_4		__ALTMON_4
+# define ALTMON_5		__ALTMON_5
+# define ALTMON_6		__ALTMON_6
+# define ALTMON_7		__ALTMON_7
+# define ALTMON_8		__ALTMON_8
+# define ALTMON_9		__ALTMON_9
+# define ALTMON_10		__ALTMON_10
+# define ALTMON_11		__ALTMON_11
+# define ALTMON_12		__ALTMON_12
+#endif
+
+  /* Long month names, in the grammatical form used when the month
+     is named by itself.  */
+  _NL_WALTMON_1,			/* January */
+  _NL_WALTMON_2,
+  _NL_WALTMON_3,
+  _NL_WALTMON_4,
+  _NL_WALTMON_5,
+  _NL_WALTMON_6,
+  _NL_WALTMON_7,
+  _NL_WALTMON_8,
+  _NL_WALTMON_9,
+  _NL_WALTMON_10,
+  _NL_WALTMON_11,
+  _NL_WALTMON_12,
+
   _NL_NUM_LC_TIME,	/* Number of indices in LC_TIME category.  */
 
   /* LC_COLLATE category: text sorting.
diff --git a/locale/programs/ld-time.c b/locale/programs/ld-time.c
index 67d055a..4186448 100644
--- a/locale/programs/ld-time.c
+++ b/locale/programs/ld-time.c
@@ -91,6 +91,9 @@ struct locale_time_t
   const char *date_fmt;
   const uint32_t *wdate_fmt;
   int alt_digits_defined;
+  const char *alt_mon[12];
+  const uint32_t *walt_mon[12];
+  int alt_mon_defined;
   unsigned char week_ndays;
   uint32_t week_1stday;
   unsigned char week_1stweek;
@@ -639,6 +642,15 @@ time_output (struct localedef_t *locale, const struct
charmap_t *charmap,
   add_locale_string (&file, time->date_fmt);
   add_locale_wstring (&file, time->wdate_fmt);
   add_locale_string (&file, charmap->code_set_name);
+
+  /* The alt'mons.  */
+  for (n = 0; n < 12; ++n)
+    add_locale_string (&file, time->alt_mon[n] ?: "");
+
+  /* The wide character alt'mons.  */
+  for (n = 0; n < 12; ++n)
+    add_locale_wstring (&file, time->walt_mon[n] ?: empty_wstr);
+
   write_locale_data (output_path, LC_TIME, "LC_TIME", &file);
 }
 
@@ -782,6 +794,7 @@ time_read (struct linereader *ldfile, struct localedef_t
*result,
 	  STRARR_ELEM (mon, 12, 12);
 	  STRARR_ELEM (am_pm, 2, 2);
 	  STRARR_ELEM (alt_digits, 0, 100);
+	  STRARR_ELEM (alt_mon, 12, 12);
 
 	case tok_era:
 	  /* Ignore the rest of the line if we don't need the input of
@@ -934,6 +947,14 @@ time_read (struct linereader *ldfile, struct localedef_t
*result,
 	    lr_error (ldfile, _("\
 %1$s: definition does not end with `END %1$s'"), "LC_TIME");
 	  lr_ignore_rest (ldfile, now->tok == tok_lc_time);
+
+	  /* If alt_mon was not specified, make it a copy of mon.  */
+	  if (!ignore_content && !time->alt_mon_defined)
+	    {
+	      memcpy (time->alt_mon, time->mon, sizeof (time->mon));
+	      memcpy (time->walt_mon, time->wmon, sizeof (time->wmon));
+	      time->alt_mon_defined = 1;
+	    }
 	  return;
 
 	default:
diff --git a/locale/programs/locfile-kw.gperf b/locale/programs/locfile-kw.gperf
index c74c1f2..dad7f21 100644
--- a/locale/programs/locfile-kw.gperf
+++ b/locale/programs/locfile-kw.gperf
@@ -148,6 +148,7 @@ first_workday,          tok_first_workday,          0
 cal_direction,          tok_cal_direction,          0
 timezone,               tok_timezone,               0
 date_fmt,               tok_date_fmt,               0
+alt_mon,                tok_alt_mon,                0
 LC_MESSAGES,            tok_lc_messages,            0
 yesexpr,                tok_yesexpr,                0
 noexpr,                 tok_noexpr,                 0
diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h
index f02325d..d49da5e 100644
--- a/locale/programs/locfile-token.h
+++ b/locale/programs/locfile-token.h
@@ -186,6 +186,7 @@ enum token_t
   tok_cal_direction,
   tok_timezone,
   tok_date_fmt,
+  tok_alt_mon,
   tok_lc_messages,
   tok_yesexpr,
   tok_noexpr,
diff --git a/localedata/tst-langinfo.c b/localedata/tst-langinfo.c
index 8c3667c..0d33e75 100644
--- a/localedata/tst-langinfo.c
+++ b/localedata/tst-langinfo.c
@@ -50,6 +50,18 @@ struct map
   VAL (ABMON_8),
   VAL (ABMON_9),
   VAL (ALT_DIGITS),
+  VAL (ALTMON_1),
+  VAL (ALTMON_10),
+  VAL (ALTMON_11),
+  VAL (ALTMON_12),
+  VAL (ALTMON_2),
+  VAL (ALTMON_3),
+  VAL (ALTMON_4),
+  VAL (ALTMON_5),
+  VAL (ALTMON_6),
+  VAL (ALTMON_7),
+  VAL (ALTMON_8),
+  VAL (ALTMON_9),
   VAL (AM_STR),
   VAL (CRNCYSTR),
   VAL (CURRENCY_SYMBOL),
diff --git a/time/Makefile b/time/Makefile
index 264eed9..2deb025 100644
--- a/time/Makefile
+++ b/time/Makefile
@@ -48,7 +48,8 @@ tests	:= test_time clocktest tst-posixtz tst-strptime
tst_wcsftime \
 include ../Rules
 
 ifeq ($(run-built-tests),yes)
-LOCALES := de_DE.ISO-8859-1 en_US.ISO-8859-1 ja_JP.EUC-JP
+LOCALES := de_DE.ISO-8859-1 en_US.ISO-8859-1 ja_JP.EUC-JP fr_FR.UTF-8 \
+	   pl_PL.UTF-8
 include ../gen-locales.mk
 
 $(objpfx)tst-ftime_l.out: $(gen-locales)
diff --git a/time/strftime_l.c b/time/strftime_l.c
index 18651ff..ac5d28f 100644
--- a/time/strftime_l.c
+++ b/time/strftime_l.c
@@ -492,6 +492,9 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T
*format,
 # define f_month \
   ((const CHAR_T *) (tp->tm_mon < 0 || tp->tm_mon > 11			     \
 		     ? "?" : _NL_CURRENT (LC_TIME, NLW(MON_1) + tp->tm_mon)))
+# define f_altmonth \
+  ((const CHAR_T *) (tp->tm_mon < 0 || tp->tm_mon > 11			     \
+		     ? "?" : _NL_CURRENT (LC_TIME, NLW(ALTMON_1) + tp->tm_mon)))
 # define ampm \
   ((const CHAR_T *) _NL_CURRENT (LC_TIME, tp->tm_hour > 11		      \
 				 ? NLW(PM_STR) : NLW(AM_STR)))
@@ -507,6 +510,7 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T
*format,
 		   ? "?" : month_name[tp->tm_mon])
 #  define a_wkday f_wkday
 #  define a_month f_month
+#  define f_altmonth f_month
 #  define ampm (L_("AMPM") + 2 * (tp->tm_hour > 11))
 
   size_t aw_len = 3;
@@ -785,7 +789,7 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T
*format,
 #endif
 
 	case L_('B'):
-	  if (modifier != 0)
+	  if (modifier == L_('E'))
 	    goto bad_format;
 	  if (change_case)
 	    {
@@ -793,7 +797,10 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const
CHAR_T *format,
 	      to_lowcase = 0;
 	    }
 #if defined _NL_CURRENT || !HAVE_STRFTIME
-	  cpy (STRLEN (f_month), f_month);
+	  if (modifier == L_('O'))
+	    cpy (STRLEN (f_altmonth), f_altmonth);
+	  else
+	    cpy (STRLEN (f_month), f_month);
 	  break;
 #else
 	  goto underlying_strftime;
diff --git a/time/strptime_l.c b/time/strptime_l.c
index 7d4758e..39cf38d 100644
--- a/time/strptime_l.c
+++ b/time/strptime_l.c
@@ -124,6 +124,8 @@ extern const struct __locale_data _nl_C_LC_TIME
attribute_hidden;
   (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (ABDAY_1)].string)
 # define month_name (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (MON_1)].string)
 # define ab_month_name (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (ABMON_1)].string)
+# define alt_month_name \
+  (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (ALTMON_1)].string)
 # define HERE_D_T_FMT (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (D_T_FMT)].string)
 # define HERE_D_FMT (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (D_FMT)].string)
 # define HERE_AM_STR (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (AM_STR)].string)
@@ -319,10 +321,9 @@ __strptime_internal (const char *rp, const char *fmt,
struct tm *tmp,
       while (*fmt >= '0' && *fmt <= '9')
 	++fmt;
 
-#ifndef _NL_CURRENT
-      /* We need this for handling the `E' modifier.  */
+      /* In some cases, modifiers are handled by adjusting state and
+         then restarting the switch statement below.  */
     start_over:
-#endif
 
       /* Make back up of current processing pointer.  */
       rp_backup = rp;
@@ -423,13 +424,32 @@ __strptime_internal (const char *rp, const char *fmt,
struct tm *tmp,
 				     ab_month_name[cnt]))
 			decided_longest = loc;
 		    }
+#ifdef _LIBC
+		  /* Now check the alt month.  */
+		  trp = rp;
+		  if (match_string (_NL_CURRENT (LC_TIME, ALTMON_1 + cnt), trp)
+		      && trp > rp_longest)
+		    {
+		      rp_longest = trp;
+		      cnt_longest = cnt;
+		      if (s.decided == not
+			  && strcmp (_NL_CURRENT (LC_TIME, ALTMON_1 + cnt),
+				     alt_month_name[cnt]))
+			decided_longest = loc;
+		    }
+#endif
 		}
 #endif
 	      if (s.decided != loc
 		  && (((trp = rp, match_string (month_name[cnt], trp))
 		       && trp > rp_longest)
 		      || ((trp = rp, match_string (ab_month_name[cnt], trp))
-			  && trp > rp_longest)))
+			  && trp > rp_longest)
+#ifdef _LIBC
+		      || ((trp = rp, match_string (alt_month_name[cnt], trp))
+			  && trp > rp_longest)
+#endif
+	      ))
 		{
 		  rp_longest = trp;
 		  cnt_longest = cnt;
@@ -1015,6 +1035,10 @@ __strptime_internal (const char *rp, const char *fmt,
struct tm *tmp,
 	case 'O':
 	  switch (*fmt++)
 	    {
+	    case 'B':
+	      /* Match month name.  Reprocess as plain 'B'.  */
+	      fmt--;
+	      goto start_over;
 	    case 'd':
 	    case 'e':
 	      /* Match day of month using alternate numeric symbols.  */
diff --git a/time/tst-strptime.c b/time/tst-strptime.c
index 34ad797..62ecb7c 100644
--- a/time/tst-strptime.c
+++ b/time/tst-strptime.c
@@ -51,6 +51,18 @@ static const struct
     6, 0, 0, 1 },
   { "ja_JP.EUC-JP", "2001 20 \xb7\xee", "%Y %U %a", 1, 140, 4, 21 },
   { "ja_JP.EUC-JP", "2001 21 \xb7\xee", "%Y %W %a", 1, 140, 4, 21 },
+  /* Most of the languages do not need the declension of the month names
+     and do not distinguish between %B and %OB.  */
+  { "en_US.ISO-8859-1", "November 17, 2017", "%B %e, %Y", 5, 320, 10, 17 },
+  { "de_DE.ISO-8859-1", "18. Nov 2017", "%d. %b %Y", 6, 321, 10, 18 },
+  { "fr_FR.UTF-8", "19 novembre 2017", "%d %OB %Y", 0, 322, 10, 19 },
+  /* Some languages do need the declension of the month names.  */
+  { "pl_PL.UTF-8", "21 lis 2017", "%d %b %Y", 2, 324, 10, 21 },
+  { "pl_PL.UTF-8", "22 LIS 2017", "%d %B %Y", 3, 325, 10, 22 },
+  /* TODO: Use the genitive case here as soon as it is added to localedata.  */
+  { "pl_PL.UTF-8", "23 listopad 2017", "%d %B %Y", 4, 326, 10, 23 },
+  /* The nominative case is incorrect here but it is parseable.  */
+  { "pl_PL.UTF-8", "24 listopad 2017", "%d %OB %Y", 5, 327, 10, 24 },
 };
 
 
-- 
2.7.5


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]