[PATCH] setlocale: New implementation
Jeff Johnston
jjohnstn@redhat.com
Thu Mar 5 18:03:00 GMT 2009
Assuming you have done some Cygwin testing with the code, please check
it in.
-- Jeff J.
Corinna Vinschen wrote:
> Hi,
>
> as promised in http://sourceware.org/ml/newlib/2009/msg00194.html here's
> my new setlocale implementation. It's now based on the FreeBSD
> implementation.
>
> The handling of the locale string is encapsulated in its own function
> called loadlocale. Originally this function is the one which actually
> loads the locale information from the locale-specific file.
> This adaption for newlib only checks the incoming string for correctness
> and stores the information. External functions formerly accessing
> __lc_ctype are changed to use the function call __locale_charset. I
> renamed the former __locale_charset function to __locale_msgcharset.
> This seemed more logical to me. This implementation of loadlocale now
> accepts all incoming locale strings of the style
>
> "C" or "POSIX",
>
> language[_territory][.charset][@modifier]
> with language being a two lowercase ASCII letters,
> territory being two uppercase ASCII letters,
> charset being one of UTF-8, JIS, SJIS, EUCJP, ISO-8859-[1..15]
>
> "C-UTF-8", "C-JIS", "C-SJIS", "C-EUCJP", "C-ISO-8859-[1..15]"
> for backward compatibility.
>
> So far only the charset information is used in other parts of newlib.
>
> The idea to use the FreeBSD function is that subsequent coding allows to
> support real locale information from locale files by just changing the
> loadlocale() function on a per target base. I'm planning to do that for
> Cygwin in the long run to support real locales. Help would be greatly
> appreciated.
>
>
> Corinna
>
>
> * libc/locale/locale.c (_setlocale_r): New implementation
> based on FreeBSD's setlocale.
> (currentlocale): New helper function.
> (loadlocale): Ditto.
> (__locale_charset): New function.
> (__locale_msgcharset): Rename from __locale_charset.
> * libc/ctype/local.h (__lc_ctype): Remove declaration.
> (__locale_charset): Declare.
> * libc/ctype/iswalpha.c (iswalpha): Call __locale_charset instead
> of using __lc_ctype directly. Only compare against the charset
> alone.
> * libc/ctype/iswblank.c (iswblank): Ditto.
> * libc/ctype/iswcntrl.c (iswcntrl): Ditto.
> * libc/ctype/iswprint.c (iswprint): Ditto.
> * libc/ctype/iswpunct.c (iswpunct): Ditto.
> * libc/ctype/iswspace.c (iswspace): Ditto.
> * libc/ctype/towlower.c (towlower): Ditto.
> * libc/ctype/towupper.c (towupper): Ditto.
> * libc/stdlib/mbtowc_r.c (_mbtowc_r): Ditto.
> * libc/stdlib/wctomb_r.c (_wctomb_r): Ditto.
> * libc/sys/linux/intl/loadmsgcat.c (_nl_init_domain_conv): Call
> __locale_msgcharset instead of __locale_charset.
>
>
> Index: libc/ctype/iswalpha.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/iswalpha.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 iswalpha.c
> --- libc/ctype/iswalpha.c 28 Oct 2005 21:33:22 -0000 1.4
> +++ libc/ctype/iswalpha.c 27 Feb 2009 10:40:19 -0000
> @@ -69,29 +69,25 @@ No supporting OS subroutines are require
> int
> _DEFUN(iswalpha,(c), wint_t c)
> {
> - int unicode = 0;
> - if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> - {
> - unicode = 0;
> - /* fall-through */
> - }
> #ifdef _MB_CAPABLE
> - else if (!strcmp (__lc_ctype, "C-JIS"))
> + int unicode = 0;
> +
> + if (!strcmp (__locale_charset (), "JIS"))
> {
> c = __jp2uc (c, JP_JIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-SJIS"))
> + else if (!strcmp (__locale_charset (), "SJIS"))
> {
> c = __jp2uc (c, JP_SJIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-EUCJP"))
> + else if (!strcmp (__locale_charset (), "EUCJP"))
> {
> c = __jp2uc (c, JP_EUCJP);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-UTF-8"))
> + else if (!strcmp (__locale_charset (), "UTF-8"))
> {
> unicode = 1;
> }
> Index: libc/ctype/iswblank.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/iswblank.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 iswblank.c
> --- libc/ctype/iswblank.c 28 Oct 2005 21:33:22 -0000 1.4
> +++ libc/ctype/iswblank.c 27 Feb 2009 10:40:19 -0000
> @@ -65,29 +65,25 @@ No supporting OS subroutines are require
> int
> _DEFUN(iswblank,(c), wint_t c)
> {
> - int unicode = 0;
> - if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> - {
> - unicode = 0;
> - /* fall-through */
> - }
> #ifdef _MB_CAPABLE
> - else if (!strcmp (__lc_ctype, "C-JIS"))
> + int unicode = 0;
> +
> + if (!strcmp (__locale_charset (), "JIS"))
> {
> c = __jp2uc (c, JP_JIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-SJIS"))
> + else if (!strcmp (__locale_charset (), "SJIS"))
> {
> c = __jp2uc (c, JP_SJIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-EUCJP"))
> + else if (!strcmp (__locale_charset (), "EUCJP"))
> {
> c = __jp2uc (c, JP_EUCJP);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-UTF-8"))
> + else if (!strcmp (__locale_charset (), "UTF-8"))
> {
> unicode = 1;
> }
> Index: libc/ctype/iswcntrl.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/iswcntrl.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 iswcntrl.c
> --- libc/ctype/iswcntrl.c 28 Oct 2005 21:33:22 -0000 1.4
> +++ libc/ctype/iswcntrl.c 27 Feb 2009 10:40:19 -0000
> @@ -65,29 +65,25 @@ No supporting OS subroutines are require
> int
> _DEFUN(iswcntrl,(c), wint_t c)
> {
> - int unicode = 0;
> - if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> - {
> - unicode = 0;
> - /* fall-through */
> - }
> #ifdef _MB_CAPABLE
> - else if (!strcmp (__lc_ctype, "C-JIS"))
> + int unicode = 0;
> +
> + if (!strcmp (__locale_charset (), "JIS"))
> {
> c = __jp2uc (c, JP_JIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-SJIS"))
> + else if (!strcmp (__locale_charset (), "SJIS"))
> {
> c = __jp2uc (c, JP_SJIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-EUCJP"))
> + else if (!strcmp (__locale_charset (), "EUCJP"))
> {
> c = __jp2uc (c, JP_EUCJP);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-UTF-8"))
> + else if (!strcmp (__locale_charset (), "UTF-8"))
> {
> unicode = 1;
> }
> Index: libc/ctype/iswprint.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/iswprint.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 iswprint.c
> --- libc/ctype/iswprint.c 28 Oct 2005 21:33:22 -0000 1.4
> +++ libc/ctype/iswprint.c 27 Feb 2009 10:40:19 -0000
> @@ -69,29 +69,25 @@ No supporting OS subroutines are require
> int
> _DEFUN(iswprint,(c), wint_t c)
> {
> - int unicode = 0;
> - if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> - {
> - unicode = 0;
> - /* fall-through */
> - }
> #ifdef _MB_CAPABLE
> - else if (!strcmp (__lc_ctype, "C-JIS"))
> + int unicode = 0;
> +
> + if (!strcmp (__locale_charset (), "JIS"))
> {
> c = __jp2uc (c, JP_JIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-SJIS"))
> + else if (!strcmp (__locale_charset (), "SJIS"))
> {
> c = __jp2uc (c, JP_SJIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-EUCJP"))
> + else if (!strcmp (__locale_charset (), "EUCJP"))
> {
> c = __jp2uc (c, JP_EUCJP);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-UTF-8"))
> + else if (!strcmp (__locale_charset (), "UTF-8"))
> {
> unicode = 1;
> }
> Index: libc/ctype/iswpunct.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/iswpunct.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 iswpunct.c
> --- libc/ctype/iswpunct.c 28 Oct 2005 21:33:22 -0000 1.4
> +++ libc/ctype/iswpunct.c 27 Feb 2009 10:40:19 -0000
> @@ -69,29 +69,25 @@ No supporting OS subroutines are require
> int
> _DEFUN(iswpunct,(c), wint_t c)
> {
> - int unicode = 0;
> - if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> - {
> - unicode = 0;
> - /* fall-through */
> - }
> #ifdef _MB_CAPABLE
> - else if (!strcmp (__lc_ctype, "C-JIS"))
> + int unicode = 0;
> +
> + if (!strcmp (__locale_charset (), "JIS"))
> {
> c = __jp2uc (c, JP_JIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-SJIS"))
> + else if (!strcmp (__locale_charset (), "SJIS"))
> {
> c = __jp2uc (c, JP_SJIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-EUCJP"))
> + else if (!strcmp (__locale_charset (), "EUCJP"))
> {
> c = __jp2uc (c, JP_EUCJP);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-UTF-8"))
> + else if (!strcmp (__locale_charset (), "UTF-8"))
> {
> unicode = 1;
> }
> Index: libc/ctype/iswspace.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/iswspace.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 iswspace.c
> --- libc/ctype/iswspace.c 28 Oct 2005 21:33:22 -0000 1.4
> +++ libc/ctype/iswspace.c 27 Feb 2009 10:40:19 -0000
> @@ -65,29 +65,25 @@ No supporting OS subroutines are require
> int
> _DEFUN(iswspace,(c), wint_t c)
> {
> - int unicode = 0;
> - if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> - {
> - unicode = 0;
> - /* fall-through */
> - }
> #ifdef _MB_CAPABLE
> - else if (!strcmp (__lc_ctype, "C-JIS"))
> + int unicode = 0;
> +
> + if (!strcmp (__locale_charset (), "JIS"))
> {
> c = __jp2uc (c, JP_JIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-SJIS"))
> + else if (!strcmp (__locale_charset (), "SJIS"))
> {
> c = __jp2uc (c, JP_SJIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-EUCJP"))
> + else if (!strcmp (__locale_charset (), "EUCJP"))
> {
> c = __jp2uc (c, JP_EUCJP);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-UTF-8"))
> + else if (!strcmp (__locale_charset (), "UTF-8"))
> {
> unicode = 1;
> }
> Index: libc/ctype/local.h
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/local.h,v
> retrieving revision 1.1
> diff -u -p -r1.1 local.h
> --- libc/ctype/local.h 20 Sep 2002 20:13:10 -0000 1.1
> +++ libc/ctype/local.h 27 Feb 2009 10:40:19 -0000
> @@ -20,7 +20,7 @@
> #define WC_UPPER 11
> #define WC_XDIGIT 12
>
> -extern char __lc_ctype[12];
> +extern char *__locale_charset ();
>
> /* Japanese encoding types supported */
> #define JP_JIS 1
> Index: libc/ctype/towlower.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/towlower.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 towlower.c
> --- libc/ctype/towlower.c 28 Oct 2005 21:33:22 -0000 1.4
> +++ libc/ctype/towlower.c 27 Feb 2009 10:40:19 -0000
> @@ -69,30 +69,25 @@ No supporting OS subroutines are require
> wint_t
> _DEFUN(towlower,(c), wint_t c)
> {
> +#ifdef _MB_CAPABLE
> int unicode = 0;
>
> - if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> - {
> - unicode = 0;
> - /* fall-through */
> - }
> -#ifdef _MB_CAPABLE
> - else if (!strcmp (__lc_ctype, "C-JIS"))
> + if (!strcmp (__locale_charset (), "JIS"))
> {
> c = __jp2uc (c, JP_JIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-SJIS"))
> + else if (!strcmp (__locale_charset (), "SJIS"))
> {
> c = __jp2uc (c, JP_SJIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-EUCJP"))
> + else if (!strcmp (__locale_charset (), "EUCJP"))
> {
> c = __jp2uc (c, JP_EUCJP);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-UTF-8"))
> + else if (!strcmp (__locale_charset (), "UTF-8"))
> {
> unicode = 1;
> }
> Index: libc/ctype/towupper.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/towupper.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 towupper.c
> --- libc/ctype/towupper.c 28 Oct 2005 21:33:22 -0000 1.4
> +++ libc/ctype/towupper.c 27 Feb 2009 10:40:19 -0000
> @@ -69,30 +69,25 @@ No supporting OS subroutines are require
> wint_t
> _DEFUN(towupper,(c), wint_t c)
> {
> +#ifdef _MB_CAPABLE
> int unicode = 0;
>
> - if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> - {
> - unicode = 0;
> - /* fall-through */
> - }
> -#ifdef _MB_CAPABLE
> - else if (!strcmp (__lc_ctype, "C-JIS"))
> + if (!strcmp (__locale_charset (), "JIS"))
> {
> c = __jp2uc (c, JP_JIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-SJIS"))
> + else if (!strcmp (__locale_charset (), "SJIS"))
> {
> c = __jp2uc (c, JP_SJIS);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-EUCJP"))
> + else if (!strcmp (__locale_charset (), "EUCJP"))
> {
> c = __jp2uc (c, JP_EUCJP);
> unicode = 1;
> }
> - else if (!strcmp (__lc_ctype, "C-UTF-8"))
> + else if (!strcmp (__locale_charset (), "UTF-8"))
> {
> unicode = 1;
> }
> Index: libc/locale/locale.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/locale/locale.c,v
> retrieving revision 1.8
> diff -u -p -r1.8 locale.c
> --- libc/locale/locale.c 23 Apr 2004 21:44:21 -0000 1.8
> +++ libc/locale/locale.c 27 Feb 2009 10:40:19 -0000
> @@ -42,13 +42,16 @@ execution environment for international
> information; <<localeconv>> reports on the settings of the current
> locale.
>
> -This is a minimal implementation, supporting only the required <<"C">>
> -value for <[locale]>; strings representing other locales are not
> -honored unless _MB_CAPABLE is defined in which case three new
> -extensions are allowed for LC_CTYPE or LC_MESSAGES only: <<"C-JIS">>,
> -<<"C-EUCJP">>, <<"C-SJIS">>, or <<"C-ISO-8859-1">>. (<<"">> is
> -also accepted; it represents the default locale
> -for an implementation, here equivalent to <<"C">>.)
> +This is a minimal implementation, supporting only the required <<"POSIX">>
> +and <<"C">> values for <[locale]>; strings representing other locales are not
> +honored unless _MB_CAPABLE is defined in which case POSIX locale strings
> +are allowed, plus five extensions supported for backward compatibility with
> +older implementations using newlib: <<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>,
> +<<"C-SJIS">>, or <<"C-ISO-8859-x">> with 1 <= x <= 15. Even when using
> +POSIX locale strings, the only charsets allowed are <<"UTF-8">>, <<"JIS">>,
> +<<"EUCJP">>, <<"SJIS">>, or <<"ISO-8859-x">> with 1 <= x <= 15. (<<"">> is
> +also accepted; if given, the settings are read from the corresponding
> +LC_* environment variables and $LANG according to POSIX rules.
>
> If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns
> a pointer to the string representing the current locale (always
> @@ -66,9 +69,13 @@ in effect.
> <[reent]> is a pointer to a reentrancy structure.
>
> RETURNS
> -<<setlocale>> returns either a pointer to a string naming the locale
> -currently in effect (always <<"C">> for this implementation, or, if
> -the locale request cannot be honored, <<NULL>>.
> +A successful call to <<setlocale>> returns a pointer to a string
> +associated with the specified category for the new locale. The string
> +returned by <<setlocale>> is such that a subsequent call using that
> +string will restore that category (or all categories in case of LC_ALL),
> +to that state. The application shall not modify the string returned
> +which may be overwritten by a subsequent call to <<setlocale>>.
> +On error, <<setlocale>> returns <<NULL>>.
>
> <<localeconv>> returns a pointer to a structure of type <<lconv>>,
> which describes the formatting and collating conventions in effect (in
> @@ -81,16 +88,50 @@ implementations is the C locale.
> No supporting OS subroutines are required.
> */
>
> +/* Parts of this code are originally taken from FreeBSD. */
> /*
> - * setlocale, localeconv : internationalize your locale.
> - * (Only "C" or null supported).
> + * Copyright (c) 1996 - 2002 FreeBSD Project
> + * Copyright (c) 1991, 1993
> + * The Regents of the University of California. All rights reserved.
> + *
> + * This code is derived from software contributed to Berkeley by
> + * Paul Borman at Krystal Technologies.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + * 4. Neither the name of the University nor the names of its contributors
> + * may be used to endorse or promote products derived from this software
> + * without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> */
>
> #include <newlib.h>
> +#include <errno.h>
> #include <locale.h>
> #include <string.h>
> #include <limits.h>
> #include <reent.h>
> +#include <stdlib.h>
> +
> +#define _LC_LAST 7
> +#define ENCODING_LEN 31
>
> #ifdef __CYGWIN__
> int __declspec(dllexport) __mb_cur_max = 1;
> @@ -109,11 +150,48 @@ static _CONST struct lconv lconv =
> CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
> };
>
> +#ifdef _MB_CAPABLE
> +/*
> + * Category names for getenv()
> + */
> +static char *categories[_LC_LAST] = {
> + "LC_ALL",
> + "LC_COLLATE",
> + "LC_CTYPE",
> + "LC_MONETARY",
> + "LC_NUMERIC",
> + "LC_TIME",
> + "LC_MESSAGES",
> +};
>
> -char * _EXFUN(__locale_charset,(_VOID));
> +/*
> + * Current locales for each category
> + */
> +static char current_categories[_LC_LAST][ENCODING_LEN + 1] = {
> + "C",
> + "C",
> + "C",
> + "C",
> + "C",
> + "C",
> + "C",
> +};
>
> -static char *charset = "ISO-8859-1";
> -char __lc_ctype[12] = "C";
> +/*
> + * The locales we are going to try and load
> + */
> +static char new_categories[_LC_LAST][ENCODING_LEN + 1];
> +static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
> +
> +static char current_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)];
> +static char *currentlocale(void);
> +static char *loadlocale(struct _reent *, int);
> +static const char *__get_locale_env(struct _reent *, int);
> +
> +#endif
> +
> +static char lc_ctype_charset[ENCODING_LEN + 1] = "ISO-8859-1";
> +static char lc_message_charset[ENCODING_LEN + 1] = "ISO-8859-1";
>
> char *
> _DEFUN(_setlocale_r, (p, category, locale),
> @@ -124,154 +202,303 @@ _DEFUN(_setlocale_r, (p, category, local
> #ifndef _MB_CAPABLE
> if (locale)
> {
> - if (strcmp (locale, "C") && strcmp (locale, ""))
> - return 0;
> + if (strcmp (locale, "POSIX") && strcmp (locale, "C")
> + && strcmp (locale, ""))
> + return NULL;
> p->_current_category = category;
> p->_current_locale = locale;
> }
> return "C";
> #else
> - static char last_lc_ctype[12] = "C";
> - static char lc_messages[12] = "C";
> - static char last_lc_messages[12] = "C";
> + int i, j, len, saverr;
> + const char *env, *r;
>
> - if (locale)
> + if (category < LC_ALL || category >= _LC_LAST)
> {
> - char *locale_name = (char *)locale;
> - if (category != LC_CTYPE && category != LC_MESSAGES)
> - {
> - if (strcmp (locale, "C") && strcmp (locale, ""))
> - return 0;
> - if (category == LC_ALL)
> - {
> - strcpy (last_lc_ctype, __lc_ctype);
> - strcpy (__lc_ctype, "C");
> - strcpy (last_lc_messages, lc_messages);
> - strcpy (lc_messages, "C");
> - __mb_cur_max = 1;
> - }
> - }
> - else
> - {
> - if (locale[0] == 'C' && locale[1] == '-')
> - {
> - switch (locale[2])
> - {
> - case 'U':
> - if (strcmp (locale, "C-UTF-8"))
> - return 0;
> - break;
> - case 'J':
> - if (strcmp (locale, "C-JIS"))
> - return 0;
> - break;
> - case 'E':
> - if (strcmp (locale, "C-EUCJP"))
> - return 0;
> - break;
> - case 'S':
> - if (strcmp (locale, "C-SJIS"))
> - return 0;
> - break;
> - case 'I':
> - if (strcmp (locale, "C-ISO-8859-1"))
> - return 0;
> - break;
> - default:
> - return 0;
> - }
> - }
> - else
> - {
> - if (strcmp (locale, "C") && strcmp (locale, ""))
> - return 0;
> - locale_name = "C"; /* C is always the default locale */
> - }
> -
> - if (category == LC_CTYPE)
> - {
> - strcpy (last_lc_ctype, __lc_ctype);
> - strcpy (__lc_ctype, locale_name);
> -
> - __mb_cur_max = 1;
> - if (locale[1] == '-')
> - {
> - switch (locale[2])
> - {
> - case 'U':
> - __mb_cur_max = 6;
> - break;
> - case 'J':
> - __mb_cur_max = 8;
> - break;
> - case 'E':
> - __mb_cur_max = 2;
> - break;
> - case 'S':
> - __mb_cur_max = 2;
> - break;
> - case 'I':
> - default:
> - __mb_cur_max = 1;
> - }
> - }
> - }
> - else
> - {
> - strcpy (last_lc_messages, lc_messages);
> - strcpy (lc_messages, locale_name);
> -
> - charset = "ISO-8859-1";
> - if (locale[1] == '-')
> - {
> - switch (locale[2])
> - {
> - case 'U':
> - charset = "UTF-8";
> - break;
> - case 'J':
> - charset = "JIS";
> - break;
> - case 'E':
> - charset = "EUCJP";
> - break;
> - case 'S':
> - charset = "SJIS";
> - break;
> - case 'I':
> - charset = "ISO-8859-1";
> - break;
> - default:
> - return 0;
> - }
> - }
> - }
> - }
> - p->_current_category = category;
> - p->_current_locale = locale;
> + p->_errno = EINVAL;
> + return NULL;
> + }
> +
> + if (locale == NULL)
> + return category != LC_ALL ? current_categories[category] : currentlocale();
>
> - if (category == LC_CTYPE)
> - return last_lc_ctype;
> - else if (category == LC_MESSAGES)
> - return last_lc_messages;
> + /*
> + * Default to the current locale for everything.
> + */
> + for (i = 1; i < _LC_LAST; ++i)
> + strcpy (new_categories[i], current_categories[i]);
> +
> + /*
> + * Now go fill up new_categories from the locale argument
> + */
> + if (!*locale)
> + {
> + if (category == LC_ALL)
> + {
> + for (i = 1; i < _LC_LAST; ++i)
> + {
> + env = __get_locale_env (p, i);
> + if (strlen (env) > ENCODING_LEN)
> + {
> + p->_errno = EINVAL;
> + return NULL;
> + }
> + strcpy (new_categories[i], env);
> + }
> + }
> + else
> + {
> + env = __get_locale_env (p, category);
> + if (strlen (env) > ENCODING_LEN)
> + {
> + p->_errno = EINVAL;
> + return NULL;
> + }
> + strcpy (new_categories[category], env);
> + }
> + }
> + else if (category != LC_ALL)
> + {
> + if (strlen (locale) > ENCODING_LEN)
> + {
> + p->_errno = EINVAL;
> + return NULL;
> + }
> + strcpy (new_categories[category], locale);
> }
> else
> {
> - if (category == LC_CTYPE)
> - return __lc_ctype;
> - else if (category == LC_MESSAGES)
> - return lc_messages;
> + if ((r = strchr (locale, '/')) == NULL)
> + {
> + if (strlen (locale) > ENCODING_LEN)
> + {
> + p->_errno = EINVAL;
> + return NULL;
> + }
> + for (i = 1; i < _LC_LAST; ++i)
> + strcpy (new_categories[i], locale);
> + }
> + else
> + {
> + for (i = 1; r[1] == '/'; ++r)
> + ;
> + if (!r[1])
> + {
> + p->_errno = EINVAL;
> + return NULL; /* Hmm, just slashes... */
> + }
> + do
> + {
> + if (i == _LC_LAST)
> + break; /* Too many slashes... */
> + if ((len = r - locale) > ENCODING_LEN)
> + {
> + p->_errno = EINVAL;
> + return NULL;
> + }
> + strlcpy (new_categories[i], locale, len + 1);
> + i++;
> + while (*r == '/')
> + r++;
> + locale = r;
> + while (*r && *r != '/')
> + r++;
> + }
> + while (*locale);
> + while (i < _LC_LAST)
> + {
> + strcpy (new_categories[i], new_categories[i-1]);
> + i++;
> + }
> + }
> }
> -
> - return "C";
> +
> + if (category != LC_ALL)
> + return loadlocale (p, category);
> +
> + for (i = 1; i < _LC_LAST; ++i)
> + {
> + strcpy (saved_categories[i], current_categories[i]);
> + if (loadlocale (p, i) == NULL)
> + {
> + saverr = p->_errno;
> + for (j = 1; j < i; j++)
> + {
> + strcpy (new_categories[j], saved_categories[j]);
> + if (loadlocale (p, j) == NULL)
> + {
> + strcpy (new_categories[j], "C");
> + loadlocale (p, j);
> + }
> + }
> + p->_errno = saverr;
> + return NULL;
> + }
> + }
> + return currentlocale ();
> #endif
> +}
> +
> +#ifdef _MB_CAPABLE
> +static char *
> +currentlocale()
> +{
> + int i;
> +
> + (void)strcpy(current_locale_string, current_categories[1]);
> +
> + for (i = 2; i < _LC_LAST; ++i)
> + if (strcmp(current_categories[1], current_categories[i])) {
> + for (i = 2; i < _LC_LAST; ++i) {
> + (void)strcat(current_locale_string, "/");
> + (void)strcat(current_locale_string,
> + current_categories[i]);
> + }
> + break;
> + }
> + return (current_locale_string);
> +}
> +#endif
> +
> +#ifdef _MB_CAPABLE
> +static char *
> +loadlocale(struct _reent *p, int category)
> +{
> + /* At this point a full-featured system would just load the locale
> + specific data from the locale files.
> + What we do here for now is to check the incoming string for correctness.
> + The string must be in one of the allowed locale strings, either
> + one in POSIX-style, or one in the old newlib style to maintain
> + backward compatibility. If the local string is correct, the charset
> + is extracted and stored in lc_ctype_charset or lc_message_charset
> + dependent on the cateogry. */
> + char *locale = new_categories[category];
> + char charset[ENCODING_LEN + 1];
> + unsigned long val;
> + char *end;
> + int mbc_max;
>
> + /* "POSIX" is translated to "C", as on Linux. */
> + if (!strcmp (locale, "POSIX"))
> + strcpy (locale, "C");
> + if (!strcmp (locale, "C")) /* Default "C" locale */
> + strcpy (charset, "ISO-8859-1");
> + else if (locale[0] == 'C' && locale[1] == '-') /* Old newlib style */
> + strcpy (charset, locale + 2);
> + else /* POSIX style */
> + {
> + char *c = locale;
> +
> + /* Don't use ctype macros here, they might be localized. */
> + /* Language */
> + if (c[0] <= 'a' || c[0] >= 'z'
> + || c[1] <= 'a' || c[1] >= 'z')
> + return NULL;
> + c += 2;
> + if (c[0] == '_')
> + {
> + /* Territory */
> + ++c;
> + if (c[0] <= 'A' || c[0] >= 'Z'
> + || c[1] <= 'A' || c[1] >= 'Z')
> + return NULL;
> + c += 2;
> + }
> + if (c[0] == '.')
> + {
> + /* Charset */
> + strcpy (charset, c + 1);
> + if ((c = strchr (charset, '@')))
> + /* Strip off modifier */
> + *c = '\0';
> + }
> + else if (c[0] == '\0' || c[0] == '@')
> + /* End of string or just a modifier */
> + strcpy (charset, "ISO-8859-1");
> + else
> + /* Invalid string */
> + return NULL;
> + }
> + /* We only support this subset of charsets. */
> + switch (charset[0])
> + {
> + case 'U':
> + if (strcmp (charset, "UTF-8"))
> + return NULL;
> + mbc_max = 6;
> + break;
> + case 'J':
> + if (strcmp (charset, "JIS"))
> + return NULL;
> + mbc_max = 8;
> + break;
> + case 'E':
> + if (strcmp (charset, "EUCJP"))
> + return NULL;
> + mbc_max = 2;
> + break;
> + case 'S':
> + if (strcmp (charset, "SJIS"))
> + return NULL;
> + mbc_max = 2;
> + break;
> + case 'I':
> + default:
> + /* Must be exactly one of ISO-8859-1, [...] ISO-8859-15. */
> + if (strncmp (charset, "ISO-8859-", 9))
> + return NULL;
> + val = strtol (charset + 9, &end, 10);
> + if (val < 1 || val > 15 || *end)
> + return NULL;
> + mbc_max = 1;
> + break;
> + }
> + if (category == LC_CTYPE)
> + {
> + strcpy (lc_ctype_charset, charset);
> + __mb_cur_max = mbc_max;
> + }
> + else if (category == LC_MESSAGES)
> + strcpy (lc_message_charset, charset);
> + p->_current_category = category;
> + p->_current_locale = locale;
> + return strcpy(current_categories[category], new_categories[category]);
> +}
> +
> +static const char *
> +__get_locale_env(struct _reent *p, int category)
> +{
> + const char *env;
> +
> + /* 1. check LC_ALL. */
> + env = _getenv_r (p, categories[0]);
> +
> + /* 2. check LC_* */
> + if (env == NULL || !*env)
> + env = _getenv_r (p, categories[category]);
> +
> + /* 3. check LANG */
> + if (env == NULL || !*env)
> + env = _getenv_r (p, "LANG");
> +
> + /* 4. if none is set, fall to "C" */
> + if (env == NULL || !*env)
> + env = "C";
> +
> + return env;
> }
> +#endif
>
> char *
> _DEFUN_VOID(__locale_charset)
> {
> - return charset;
> + return lc_ctype_charset;
> +}
> +
> +char *
> +_DEFUN_VOID(__locale_msgcharset)
> +{
> + return lc_message_charset;
> }
>
> struct lconv *
> Index: libc/stdlib/mbtowc_r.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/stdlib/mbtowc_r.c,v
> retrieving revision 1.9
> diff -u -p -r1.9 mbtowc_r.c
> --- libc/stdlib/mbtowc_r.c 25 Feb 2009 09:10:09 -0000 1.9
> +++ libc/stdlib/mbtowc_r.c 27 Feb 2009 10:40:20 -0000
> @@ -45,8 +45,6 @@ static JIS_ACTION JIS_action_table[JIS_S
> /* we override the mbstate_t __count field for more complex encodings and use it store a state value */
> #define __state __count
>
> -extern char __lc_ctype[12];
> -
> int
> _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
> struct _reent *r _AND
> @@ -65,9 +63,9 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
> return -2;
>
> #ifdef _MB_CAPABLE
> - if (strlen (__lc_ctype) <= 1)
> + if (strlen (__locale_charset ()) <= 1)
> { /* fall-through */ }
> - else if (!strcmp (__lc_ctype, "C-UTF-8"))
> + else if (!strcmp (__locale_charset (), "UTF-8"))
> {
> int ch;
> int i = 0;
> @@ -221,7 +219,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
> else
> return -1;
> }
> - else if (!strcmp (__lc_ctype, "C-SJIS"))
> + else if (!strcmp (__locale_charset (), "SJIS"))
> {
> int ch;
> int i = 0;
> @@ -251,7 +249,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
> return -1;
> }
> }
> - else if (!strcmp (__lc_ctype, "C-EUCJP"))
> + else if (!strcmp (__locale_charset (), "EUCJP"))
> {
> int ch;
> int i = 0;
> @@ -281,7 +279,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
> return -1;
> }
> }
> - else if (!strcmp (__lc_ctype, "C-JIS"))
> + else if (!strcmp (__locale_charset (), "JIS"))
> {
> JIS_STATE curr_state;
> JIS_ACTION action;
> Index: libc/stdlib/wctomb_r.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/stdlib/wctomb_r.c,v
> retrieving revision 1.8
> diff -u -p -r1.8 wctomb_r.c
> --- libc/stdlib/wctomb_r.c 25 Feb 2009 09:10:09 -0000 1.8
> +++ libc/stdlib/wctomb_r.c 27 Feb 2009 10:40:20 -0000
> @@ -7,8 +7,6 @@
> /* for some conversions, we use the __count field as a place to store a state value */
> #define __state __count
>
> -extern char __lc_ctype[12];
> -
> int
> _DEFUN (_wctomb_r, (r, s, wchar, state),
> struct _reent *r _AND
> @@ -21,9 +19,9 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
> is 4, as is the case on cygwin. */
> wint_t wchar = _wchar;
>
> - if (strlen (__lc_ctype) <= 1)
> + if (strlen (__locale_charset ()) <= 1)
> { /* fall-through */ }
> - else if (!strcmp (__lc_ctype, "C-UTF-8"))
> + else if (!strcmp (__locale_charset (), "UTF-8"))
> {
> if (s == NULL)
> return 0; /* UTF-8 encoding is not state-dependent */
> @@ -93,9 +91,9 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
> return 4;
> }
> else
> - return -1;
> + return -1;
> }
> - else if (!strcmp (__lc_ctype, "C-SJIS"))
> + else if (!strcmp (__locale_charset (), "SJIS"))
> {
> unsigned char char2 = (unsigned char)wchar;
> unsigned char char1 = (unsigned char)(wchar >> 8);
> @@ -113,10 +111,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
> return 2;
> }
> else
> - return -1;
> + return -1;
> }
> }
> - else if (!strcmp (__lc_ctype, "C-EUCJP"))
> + else if (!strcmp (__locale_charset (), "EUCJP"))
> {
> unsigned char char2 = (unsigned char)wchar;
> unsigned char char1 = (unsigned char)(wchar >> 8);
> @@ -134,10 +132,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
> return 2;
> }
> else
> - return -1;
> + return -1;
> }
> }
> - else if (!strcmp (__lc_ctype, "C-JIS"))
> + else if (!strcmp (__locale_charset (), "JIS"))
> {
> int cnt = 0;
> unsigned char char2 = (unsigned char)wchar;
> @@ -165,7 +163,7 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
> return cnt + 2;
> }
> else
> - return -1;
> + return -1;
> }
> else
> {
> Index: libc/sys/linux/intl/loadmsgcat.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/sys/linux/intl/loadmsgcat.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 loadmsgcat.c
> --- libc/sys/linux/intl/loadmsgcat.c 23 Apr 2004 21:44:22 -0000 1.2
> +++ libc/sys/linux/intl/loadmsgcat.c 27 Feb 2009 10:40:20 -0000
> @@ -266,8 +266,8 @@ _nl_init_domain_conv (domain_file, domai
> outcharset = (*_nl_current[LC_CTYPE])->values[_NL_ITEM_INDEX (CODESET)].string;
> # else
> # if HAVE_ICONV
> - extern const char *__locale_charset (void);
> - outcharset = __locale_charset ();
> + extern const char *__locale_msgcharset (void);
> + outcharset = __locale_msgcharset ();
> # endif
> # endif
> }
>
>
>
More information about the Newlib
mailing list