This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH v11 3/5] Abbreviated alternative month names (%Ob) also added (bug 10871).
On 01/08/2018 04:01 PM, Rafal Luzynski wrote:
> All the previous changes also repeated to support abbreviated
> alternative month names. In most languages which have declension and
> need nominative/genitive month names the abbreviated forms for both
> cases are the same. An example where they do differ is May in Russian:
> this name is too short to be abbreviated so even the abbreviated form
> features the declension suffixes.
>
> [BZ #10871]
> * locale/C-time.c: Add abbreviated alternative month names, define
> them as the same as abbreviated month names explicitly.
> * locale/categories.def: ab_alt_mon and wide-ab_alt_mon added.
> * locale/langinfo.h: New public symbols _NL_ABALTMON_1,
> _NL_ABALTMON_2, _NL_ABALTMON_3, _NL_ABALTMON_4, _NL_ABALTMON_5,
> _NL_ABALTMON_6, _NL_ABALTMON_7, _NL_ABALTMON_8, _NL_ABALTMON_9,
> _NL_ABALTMON_10, _NL_ABALTMON_11, _NL_ABALTMON_12,
> _NL_WABALTMON_1, _NL_WABALTMON_2, _NL_WABALTMON_3, _NL_WABALTMON_4,
> _NL_WABALTMON_5, _NL_WABALTMON_6, _NL_WABALTMON_7, _NL_WABALTMON_8,
> _NL_WABALTMON_9, _NL_WABALTMON_10, _NL_WABALTMON_11, _NL_WABALTMON_12.
> * locale/programs/ld-time.c: Abbreviated alternative month names
> support added, they are a copy of abmon if not provided
> explicitly.
> * locale/programs/locfile-kw.gperf: ab_alt_mon defined.
> * locale/programs/locfile-token.h: tok_ab_alt_mon defined.
> * time/Makefile (LOCALES): Add ru_RU.UTF-8 for tests.
> * time/strftime_l.c: %Ob (%Oh) format for abbreviated
> alternative month names added.
> * time/strptime_l.c: Abbreviated alternative month names also
> recognized.
> * time/tst-strptime.c: Add more tests to parse different forms of
> month names including the new %Ob format specifier.
Why is there no ABALTMON_* via #ifdef __USE_GNU like there is for ALTMON_*?
It is OK without them, but seems like a missing useful feature.
OK with the test case changes to write UTF-8 directly in the test case string.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
> ---
> locale/C-time.c | 28 ++++++++++++++++++++++++++--
> locale/categories.def | 6 ++++--
> locale/langinfo.h | 36 ++++++++++++++++++++++++++++++++++--
> locale/programs/ld-time.c | 19 +++++++++++++++++++
> locale/programs/locfile-kw.gperf | 1 +
> locale/programs/locfile-token.h | 1 +
> time/Makefile | 3 ++-
> time/strftime_l.c | 14 ++++++++++++--
> time/strptime_l.c | 18 ++++++++++++++++++
> time/tst-strptime.c | 13 +++++++++++++
> 10 files changed, 130 insertions(+), 9 deletions(-)
>
> diff --git a/locale/C-time.c b/locale/C-time.c
> index 73bc700..e2b3b17 100644
> --- a/locale/C-time.c
> +++ b/locale/C-time.c
> @@ -30,7 +30,7 @@ const struct __locale_data _nl_C_LC_TIME attribute_hidden =
> { NULL, }, /* no cached data */
> UNDELETABLE,
> 0,
> - 135,
> + 159,
OK.
> {
> { .string = "Sun" },
> { .string = "Mon" },
> @@ -166,6 +166,30 @@ const struct __locale_data _nl_C_LC_TIME attribute_hidden =
> { .wstr = (const uint32_t *) L"September" },
> { .wstr = (const uint32_t *) L"October" },
> { .wstr = (const uint32_t *) L"November" },
> - { .wstr = (const uint32_t *) L"December" }
> + { .wstr = (const uint32_t *) L"December" },
> + { .string = "Jan" },
> + { .string = "Feb" },
> + { .string = "Mar" },
> + { .string = "Apr" },
> + { .string = "May" },
> + { .string = "Jun" },
> + { .string = "Jul" },
> + { .string = "Aug" },
> + { .string = "Sep" },
> + { .string = "Oct" },
> + { .string = "Nov" },
> + { .string = "Dec" },
> + { .wstr = (const uint32_t *) L"Jan" },
> + { .wstr = (const uint32_t *) L"Feb" },
> + { .wstr = (const uint32_t *) L"Mar" },
> + { .wstr = (const uint32_t *) L"Apr" },
> + { .wstr = (const uint32_t *) L"May" },
> + { .wstr = (const uint32_t *) L"Jun" },
> + { .wstr = (const uint32_t *) L"Jul" },
> + { .wstr = (const uint32_t *) L"Aug" },
> + { .wstr = (const uint32_t *) L"Sep" },
> + { .wstr = (const uint32_t *) L"Oct" },
> + { .wstr = (const uint32_t *) L"Nov" },
> + { .wstr = (const uint32_t *) L"Dec" }
OK.
> }
> };
> diff --git a/locale/categories.def b/locale/categories.def
> index 3cbb4e6..56c5f88 100644
> --- a/locale/categories.def
> +++ b/locale/categories.def
> @@ -249,8 +249,10 @@ DEFINE_CATEGORY
> DEFINE_ELEMENT (_DATE_FMT, "date_fmt", opt, string)
> DEFINE_ELEMENT (_NL_W_DATE_FMT, "wide-date_fmt", opt,
> wstring)
> DEFINE_ELEMENT (_NL_TIME_CODESET, "time-codeset", std, string)
> - DEFINE_ELEMENT (ALTMON_1, "alt_mon", opt, stringarray, 12, 12)
> - DEFINE_ELEMENT (_NL_WALTMON_1, "wide-alt_mon", opt, wstringarray, 12, 12)
> + DEFINE_ELEMENT (ALTMON_1, "alt_mon", opt, stringarray, 12,
> 12)
> + DEFINE_ELEMENT (_NL_WALTMON_1, "wide-alt_mon", opt, wstringarray, 12,
> 12)
> + DEFINE_ELEMENT (_NL_ABALTMON_1, "ab_alt_mon", opt, stringarray, 12,
> 12)
> + DEFINE_ELEMENT (_NL_WABALTMON_1, "wide-ab_alt_mon", opt, wstringarray, 12,
OK.
> 12)
> ), NO_POSTLOAD)
>
>
> diff --git a/locale/langinfo.h b/locale/langinfo.h
> index 0fbd838..4749e9d 100644
> --- a/locale/langinfo.h
> +++ b/locale/langinfo.h
> @@ -74,7 +74,8 @@ enum
> DAY_7, /* Saturday */
> #define DAY_7 DAY_7
>
> - /* Abbreviated month names. */
> + /* Abbreviated month names, in the grammatical form used when the month
> + forms part of a complete date. */
OK.
> ABMON_1, /* Jan */
> #define ABMON_1 ABMON_1
> ABMON_2,
> @@ -176,7 +177,8 @@ enum
> _NL_WDAY_6, /* Friday */
> _NL_WDAY_7, /* Saturday */
>
> - /* Abbreviated month names. */
> + /* Abbreviated month names, in the grammatical form used when the month
> + forms part of a complete date. */
OK.
> _NL_WABMON_1, /* Jan */
> _NL_WABMON_2,
> _NL_WABMON_3,
> @@ -277,6 +279,36 @@ enum
> _NL_WALTMON_11,
> _NL_WALTMON_12,
>
> + /* Abbreviated month names, in the grammatical form used when the month
> + is named by itself. */
> + _NL_ABALTMON_1, /* Jan */
> + _NL_ABALTMON_2,
> + _NL_ABALTMON_3,
> + _NL_ABALTMON_4,
> + _NL_ABALTMON_5,
> + _NL_ABALTMON_6,
> + _NL_ABALTMON_7,
> + _NL_ABALTMON_8,
> + _NL_ABALTMON_9,
> + _NL_ABALTMON_10,
> + _NL_ABALTMON_11,
> + _NL_ABALTMON_12,
> +
> + /* Abbreviated month names, in the grammatical form used when the month
> + is named by itself. */
> + _NL_WABALTMON_1, /* Jan */
> + _NL_WABALTMON_2,
> + _NL_WABALTMON_3,
> + _NL_WABALTMON_4,
> + _NL_WABALTMON_5,
> + _NL_WABALTMON_6,
> + _NL_WABALTMON_7,
> + _NL_WABALTMON_8,
> + _NL_WABALTMON_9,
> + _NL_WABALTMON_10,
> + _NL_WABALTMON_11,
> + _NL_WABALTMON_12,
OK.
> +
> _NL_NUM_LC_TIME, /* Number of indices in LC_TIME category. */
>
> /* LC_COLLATE category: text sorting.
> diff --git a/locale/programs/ld-time.c b/locale/programs/ld-time.c
> index 4186448..a755792 100644
> --- a/locale/programs/ld-time.c
> +++ b/locale/programs/ld-time.c
> @@ -94,6 +94,9 @@ struct locale_time_t
> const char *alt_mon[12];
> const uint32_t *walt_mon[12];
> int alt_mon_defined;
> + const char *ab_alt_mon[12];
> + const uint32_t *wab_alt_mon[12];
> + int ab_alt_mon_defined;
OK.
> unsigned char week_ndays;
> uint32_t week_1stday;
> unsigned char week_1stweek;
> @@ -651,6 +654,14 @@ time_output (struct localedef_t *locale, const struct
> charmap_t *charmap,
> for (n = 0; n < 12; ++n)
> add_locale_wstring (&file, time->walt_mon[n] ?: empty_wstr);
>
> + /* The ab'alt'mons. */
> + for (n = 0; n < 12; ++n)
> + add_locale_string (&file, time->ab_alt_mon[n] ?: "");
> +
> + /* The wide character ab'alt'mons. */
> + for (n = 0; n < 12; ++n)
> + add_locale_wstring (&file, time->wab_alt_mon[n] ?: empty_wstr);
OK.
> +
> write_locale_data (output_path, LC_TIME, "LC_TIME", &file);
> }
>
> @@ -795,6 +806,7 @@ time_read (struct linereader *ldfile, struct localedef_t
> *result,
> STRARR_ELEM (am_pm, 2, 2);
> STRARR_ELEM (alt_digits, 0, 100);
> STRARR_ELEM (alt_mon, 12, 12);
> + STRARR_ELEM (ab_alt_mon, 12, 12);
OK.
>
> case tok_era:
> /* Ignore the rest of the line if we don't need the input of
> @@ -955,6 +967,13 @@ time_read (struct linereader *ldfile, struct localedef_t
> *result,
> memcpy (time->walt_mon, time->wmon, sizeof (time->wmon));
> time->alt_mon_defined = 1;
> }
> + /* The same for abbreviated versions. */
> + if (!ignore_content && !time->ab_alt_mon_defined)
> + {
> + memcpy (time->ab_alt_mon, time->abmon, sizeof (time->abmon));
> + memcpy (time->wab_alt_mon, time->wabmon, sizeof (time->wabmon));
> + time->ab_alt_mon_defined = 1;
OK. Good, copy the abmon/wabmon versions.
> + }
> return;
>
> default:
> diff --git a/locale/programs/locfile-kw.gperf b/locale/programs/locfile-kw.gperf
> index dad7f21..6bf2f60 100644
> --- a/locale/programs/locfile-kw.gperf
> +++ b/locale/programs/locfile-kw.gperf
> @@ -149,6 +149,7 @@ cal_direction, tok_cal_direction, 0
> timezone, tok_timezone, 0
> date_fmt, tok_date_fmt, 0
> alt_mon, tok_alt_mon, 0
> +ab_alt_mon, tok_ab_alt_mon, 0
> LC_MESSAGES, tok_lc_messages, 0
> yesexpr, tok_yesexpr, 0
> noexpr, tok_noexpr, 0
> diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h
> index d49da5e..e3cd18e 100644
> --- a/locale/programs/locfile-token.h
> +++ b/locale/programs/locfile-token.h
> @@ -187,6 +187,7 @@ enum token_t
> tok_timezone,
> tok_date_fmt,
> tok_alt_mon,
> + tok_ab_alt_mon,
OK. New token.
> tok_lc_messages,
> tok_yesexpr,
> tok_noexpr,
> diff --git a/time/Makefile b/time/Makefile
> index 91adcd0..4e631a1 100644
> --- a/time/Makefile
> +++ b/time/Makefile
> @@ -48,7 +48,8 @@ tests := test_time clocktest tst-posixtz tst-strptime
> tst_wcsftime \
> include ../Rules
>
> ifeq ($(run-built-tests),yes)
> -LOCALES := de_DE.ISO-8859-1 en_US.ISO-8859-1 ja_JP.EUC-JP pl_PL.UTF-8
> +LOCALES := de_DE.ISO-8859-1 en_US.ISO-8859-1 ja_JP.EUC-JP pl_PL.UTF-8 \
> + ru_RU.UTF-8
OK.
> include ../gen-locales.mk
>
> $(objpfx)tst-ftime_l.out: $(gen-locales)
> diff --git a/time/strftime_l.c b/time/strftime_l.c
> index ac5d28f..c71f9f4 100644
> --- a/time/strftime_l.c
> +++ b/time/strftime_l.c
> @@ -106,6 +106,7 @@ extern char *tzname[];
> # define UCHAR_T unsigned char
> # define L_(Str) Str
> # define NLW(Sym) Sym
> +# define ABALTMON_1 _NL_ABALTMON_1
OK.
>
> # if !defined STDC_HEADERS && !defined HAVE_MEMCPY
> # define MEMCPY(d, s, n) bcopy ((s), (d), (n))
> @@ -492,6 +493,9 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T
> *format,
> # define f_month \
> ((const CHAR_T *) (tp->tm_mon < 0 || tp->tm_mon > 11 \
> ? "?" : _NL_CURRENT (LC_TIME, NLW(MON_1) + tp->tm_mon)))
> +# define a_altmonth \
> + ((const CHAR_T *) (tp->tm_mon < 0 || tp->tm_mon > 11 \
> + ? "?" : _NL_CURRENT (LC_TIME, NLW(ABALTMON_1) + tp->tm_mon)))
OK.
> # define f_altmonth \
> ((const CHAR_T *) (tp->tm_mon < 0 || tp->tm_mon > 11 \
> ? "?" : _NL_CURRENT (LC_TIME, NLW(ALTMON_1) + tp->tm_mon)))
> @@ -501,6 +505,7 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T
> *format,
>
> # define aw_len STRLEN (a_wkday)
> # define am_len STRLEN (a_month)
> +# define aam_len STRLEN (a_altmonth)
> # define ap_len STRLEN (ampm)
> #else
> # if !HAVE_STRFTIME
> @@ -510,11 +515,13 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const
> CHAR_T *format,
> ? "?" : month_name[tp->tm_mon])
> # define a_wkday f_wkday
> # define a_month f_month
> +# define a_altmonth a_month
> # define f_altmonth f_month
> # define ampm (L_("AMPM") + 2 * (tp->tm_hour > 11))
>
> size_t aw_len = 3;
> size_t am_len = 3;
> + size_t aam_len = 3;
> size_t ap_len = 2;
> # endif
> #endif
> @@ -779,10 +786,13 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const
> CHAR_T *format,
> to_uppcase = 1;
> to_lowcase = 0;
> }
> - if (modifier != 0)
> + if (modifier == L_('E'))
OK.
> goto bad_format;
> #if defined _NL_CURRENT || !HAVE_STRFTIME
> - cpy (am_len, a_month);
> + if (modifier == L_('O'))
> + cpy (aam_len, a_altmonth);
> + else
> + cpy (am_len, a_month);
OK.
> break;
> #else
> goto underlying_strftime;
> diff --git a/time/strptime_l.c b/time/strptime_l.c
> index 39cf38d..cd901c2 100644
> --- a/time/strptime_l.c
> +++ b/time/strptime_l.c
> @@ -126,6 +126,8 @@ extern const struct __locale_data _nl_C_LC_TIME
> attribute_hidden;
> # define ab_month_name (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (ABMON_1)].string)
> # define alt_month_name \
> (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (ALTMON_1)].string)
> +# define ab_alt_month_name \
> + (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (_NL_ABALTMON_1)].string)
OK.
> # define HERE_D_T_FMT (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (D_T_FMT)].string)
> # define HERE_D_FMT (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (D_FMT)].string)
> # define HERE_AM_STR (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (AM_STR)].string)
> @@ -437,6 +439,18 @@ __strptime_internal (const char *rp, const char *fmt,
> struct tm *tmp,
> alt_month_name[cnt]))
> decided_longest = loc;
> }
> + trp = rp;
> + if (match_string (_NL_CURRENT (LC_TIME, _NL_ABALTMON_1 + cnt),
> + trp)
> + && trp > rp_longest)
> + {
> + rp_longest = trp;
> + cnt_longest = cnt;
> + if (s.decided == not
> + && strcmp (_NL_CURRENT (LC_TIME, _NL_ABALTMON_1 + cnt),
> + alt_month_name[cnt]))
> + decided_longest = loc;
OK.
> + }
> #endif
> }
> #endif
> @@ -448,6 +462,8 @@ __strptime_internal (const char *rp, const char *fmt, struct
> tm *tmp,
> #ifdef _LIBC
> || ((trp = rp, match_string (alt_month_name[cnt], trp))
> && trp > rp_longest)
> + || ((trp = rp, match_string (ab_alt_month_name[cnt], trp))
> + && trp > rp_longest)
OK.
> #endif
> ))
> {
> @@ -1035,7 +1051,9 @@ __strptime_internal (const char *rp, const char *fmt,
> struct tm *tmp,
> case 'O':
> switch (*fmt++)
> {
> + case 'b':
> case 'B':
> + case 'h':
OK.
> /* Match month name. Reprocess as plain 'B'. */
> fmt--;
> goto start_over;
> diff --git a/time/tst-strptime.c b/time/tst-strptime.c
> index bbc1390..ab09f0f 100644
> --- a/time/tst-strptime.c
> +++ b/time/tst-strptime.c
> @@ -24,6 +24,11 @@
> #include <time.h>
>
>
> +/* Some Cyrillic letters in UTF-8. */
> +#define CYR_n "\xd0\xbd"
> +#define CYR_o "\xd0\xbe"
> +#define CYR_ya "\xd1\x8f"
Please encode the UTF-8 directly into the test case.
Developers have to use UTF-8 capable editors, and fonts.
> +
> static const struct
> {
> const char *locale;
> @@ -57,6 +62,14 @@ static const struct
> { "pl_PL.UTF-8", "23 listopad 2017", "%d %B %Y", 4, 326, 10, 23 },
> /* The nominative case is incorrect here but it is parseable. */
> { "pl_PL.UTF-8", "24 listopad 2017", "%d %OB %Y", 5, 327, 10, 24 },
> + { "pl_PL.UTF-8", "25 lis 2017", "%d %Ob %Y", 6, 328, 10, 25 },
> + { "ru_RU.UTF-8", "26 " CYR_n CYR_o CYR_ya " 2017", "%d %b %Y",
> + 0, 329, 10, 26 },
OK.
> + /* TODO: Add an example of "may"/"maya" (5th month, May) using %Ob in
> + Russian when the localedata is updated. Without the genitive forms
> + in localedata the word "maya" is ambiguous and may be mistaken for
> + "mart" (March).
> + */
> };
>
>
>
--
Cheers,
Carlos.