[PATCH] setlocale: New implementation

Jeff Johnston jjohnstn@redhat.com
Thu Mar 5 18:03:00 GMT 2009


Assuming you have done some Cygwin testing with the code, please check 
it in.

-- Jeff J.

Corinna Vinschen wrote:
> Hi,
>
> as promised in http://sourceware.org/ml/newlib/2009/msg00194.html here's
> my new setlocale implementation.  It's now based on the FreeBSD
> implementation.
>
> The handling of the locale string is encapsulated in its own function
> called loadlocale.  Originally this function is the one which actually
> loads the locale information from the locale-specific file.
> This adaption for newlib only checks the incoming string for correctness
> and stores the information.  External functions formerly accessing
> __lc_ctype are changed to use the function call __locale_charset.  I
> renamed the former __locale_charset function to __locale_msgcharset.
> This seemed more logical to me.  This implementation of loadlocale now
> accepts all incoming locale strings of the style
>
>   "C" or "POSIX",
>
>   language[_territory][.charset][@modifier]
>     with language being a two lowercase ASCII letters,
>     territory being two uppercase ASCII letters,
>     charset being one of UTF-8, JIS, SJIS, EUCJP, ISO-8859-[1..15]
>
>   "C-UTF-8", "C-JIS", "C-SJIS", "C-EUCJP", "C-ISO-8859-[1..15]"
>     for backward compatibility.
>
> So far only the charset information is used in other parts of newlib.
>
> The idea to use the FreeBSD function is that subsequent coding allows to
> support real locale information from locale files by just changing the
> loadlocale() function on a per target base.  I'm planning to do that for
> Cygwin in the long run to support real locales.  Help would be greatly
> appreciated.
>
>
> Corinna
>
>
> 	* libc/locale/locale.c (_setlocale_r): New implementation
> 	based on FreeBSD's setlocale.
> 	(currentlocale): New helper function.
> 	(loadlocale): Ditto.
> 	(__locale_charset): New function.
> 	(__locale_msgcharset): Rename from __locale_charset.
> 	* libc/ctype/local.h (__lc_ctype): Remove declaration.
> 	(__locale_charset): Declare.
> 	* libc/ctype/iswalpha.c (iswalpha): Call __locale_charset instead
> 	of using __lc_ctype directly.  Only compare against the charset
> 	alone.
> 	* libc/ctype/iswblank.c (iswblank): Ditto.
> 	* libc/ctype/iswcntrl.c (iswcntrl): Ditto.
> 	* libc/ctype/iswprint.c (iswprint): Ditto.
> 	* libc/ctype/iswpunct.c (iswpunct): Ditto.
> 	* libc/ctype/iswspace.c (iswspace): Ditto.
> 	* libc/ctype/towlower.c (towlower): Ditto.
> 	* libc/ctype/towupper.c (towupper): Ditto.
> 	* libc/stdlib/mbtowc_r.c (_mbtowc_r): Ditto.
> 	* libc/stdlib/wctomb_r.c (_wctomb_r): Ditto.
> 	* libc/sys/linux/intl/loadmsgcat.c (_nl_init_domain_conv): Call
> 	__locale_msgcharset instead of __locale_charset.
>
>
> Index: libc/ctype/iswalpha.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/iswalpha.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 iswalpha.c
> --- libc/ctype/iswalpha.c	28 Oct 2005 21:33:22 -0000	1.4
> +++ libc/ctype/iswalpha.c	27 Feb 2009 10:40:19 -0000
> @@ -69,29 +69,25 @@ No supporting OS subroutines are require
>  int
>  _DEFUN(iswalpha,(c), wint_t c)
>  {
> -  int unicode = 0;
> -  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> -    {
> -      unicode = 0;
> -      /* fall-through */ 
> -    }
>  #ifdef _MB_CAPABLE
> -  else if (!strcmp (__lc_ctype, "C-JIS"))
> +  int unicode = 0;
> +
> +  if (!strcmp (__locale_charset (), "JIS"))
>      {
>        c = __jp2uc (c, JP_JIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-SJIS"))
> +  else if (!strcmp (__locale_charset (), "SJIS"))
>      {
>        c = __jp2uc (c, JP_SJIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-EUCJP"))
> +  else if (!strcmp (__locale_charset (), "EUCJP"))
>      {
>        c = __jp2uc (c, JP_EUCJP);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-UTF-8"))
> +  else if (!strcmp (__locale_charset (), "UTF-8"))
>      {
>        unicode = 1;
>      }
> Index: libc/ctype/iswblank.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/iswblank.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 iswblank.c
> --- libc/ctype/iswblank.c	28 Oct 2005 21:33:22 -0000	1.4
> +++ libc/ctype/iswblank.c	27 Feb 2009 10:40:19 -0000
> @@ -65,29 +65,25 @@ No supporting OS subroutines are require
>  int
>  _DEFUN(iswblank,(c), wint_t c)
>  {
> -  int unicode = 0;
> -  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> -    {
> -      unicode = 0;
> -      /* fall-through */ 
> -    }
>  #ifdef _MB_CAPABLE
> -  else if (!strcmp (__lc_ctype, "C-JIS"))
> +  int unicode = 0;
> +
> +  if (!strcmp (__locale_charset (), "JIS"))
>      {
>        c = __jp2uc (c, JP_JIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-SJIS"))
> +  else if (!strcmp (__locale_charset (), "SJIS"))
>      {
>        c = __jp2uc (c, JP_SJIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-EUCJP"))
> +  else if (!strcmp (__locale_charset (), "EUCJP"))
>      {
>        c = __jp2uc (c, JP_EUCJP);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-UTF-8"))
> +  else if (!strcmp (__locale_charset (), "UTF-8"))
>      {
>        unicode = 1;
>      }
> Index: libc/ctype/iswcntrl.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/iswcntrl.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 iswcntrl.c
> --- libc/ctype/iswcntrl.c	28 Oct 2005 21:33:22 -0000	1.4
> +++ libc/ctype/iswcntrl.c	27 Feb 2009 10:40:19 -0000
> @@ -65,29 +65,25 @@ No supporting OS subroutines are require
>  int
>  _DEFUN(iswcntrl,(c), wint_t c)
>  {
> -  int unicode = 0;
> -  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> -    {
> -      unicode = 0;
> -      /* fall-through */ 
> -    }
>  #ifdef _MB_CAPABLE
> -  else if (!strcmp (__lc_ctype, "C-JIS"))
> +  int unicode = 0;
> +
> +  if (!strcmp (__locale_charset (), "JIS"))
>      {
>        c = __jp2uc (c, JP_JIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-SJIS"))
> +  else if (!strcmp (__locale_charset (), "SJIS"))
>      {
>        c = __jp2uc (c, JP_SJIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-EUCJP"))
> +  else if (!strcmp (__locale_charset (), "EUCJP"))
>      {
>        c = __jp2uc (c, JP_EUCJP);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-UTF-8"))
> +  else if (!strcmp (__locale_charset (), "UTF-8"))
>      {
>        unicode = 1;
>      }
> Index: libc/ctype/iswprint.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/iswprint.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 iswprint.c
> --- libc/ctype/iswprint.c	28 Oct 2005 21:33:22 -0000	1.4
> +++ libc/ctype/iswprint.c	27 Feb 2009 10:40:19 -0000
> @@ -69,29 +69,25 @@ No supporting OS subroutines are require
>  int
>  _DEFUN(iswprint,(c), wint_t c)
>  {
> -  int unicode = 0;
> -  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> -    {
> -      unicode = 0;
> -      /* fall-through */ 
> -    }
>  #ifdef _MB_CAPABLE
> -  else if (!strcmp (__lc_ctype, "C-JIS"))
> +  int unicode = 0;
> +
> +  if (!strcmp (__locale_charset (), "JIS"))
>      {
>        c = __jp2uc (c, JP_JIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-SJIS"))
> +  else if (!strcmp (__locale_charset (), "SJIS"))
>      {
>        c = __jp2uc (c, JP_SJIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-EUCJP"))
> +  else if (!strcmp (__locale_charset (), "EUCJP"))
>      {
>        c = __jp2uc (c, JP_EUCJP);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-UTF-8"))
> +  else if (!strcmp (__locale_charset (), "UTF-8"))
>      {
>        unicode = 1;
>      }
> Index: libc/ctype/iswpunct.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/iswpunct.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 iswpunct.c
> --- libc/ctype/iswpunct.c	28 Oct 2005 21:33:22 -0000	1.4
> +++ libc/ctype/iswpunct.c	27 Feb 2009 10:40:19 -0000
> @@ -69,29 +69,25 @@ No supporting OS subroutines are require
>  int
>  _DEFUN(iswpunct,(c), wint_t c)
>  {
> -  int unicode = 0;
> -  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> -    {
> -      unicode = 0;
> -      /* fall-through */ 
> -    }
>  #ifdef _MB_CAPABLE
> -  else if (!strcmp (__lc_ctype, "C-JIS"))
> +  int unicode = 0;
> +
> +  if (!strcmp (__locale_charset (), "JIS"))
>      {
>        c = __jp2uc (c, JP_JIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-SJIS"))
> +  else if (!strcmp (__locale_charset (), "SJIS"))
>      {
>        c = __jp2uc (c, JP_SJIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-EUCJP"))
> +  else if (!strcmp (__locale_charset (), "EUCJP"))
>      {
>        c = __jp2uc (c, JP_EUCJP);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-UTF-8"))
> +  else if (!strcmp (__locale_charset (), "UTF-8"))
>      {
>        unicode = 1;
>      }
> Index: libc/ctype/iswspace.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/iswspace.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 iswspace.c
> --- libc/ctype/iswspace.c	28 Oct 2005 21:33:22 -0000	1.4
> +++ libc/ctype/iswspace.c	27 Feb 2009 10:40:19 -0000
> @@ -65,29 +65,25 @@ No supporting OS subroutines are require
>  int
>  _DEFUN(iswspace,(c), wint_t c)
>  {
> -  int unicode = 0;
> -  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> -    {
> -      unicode = 0;
> -      /* fall-through */ 
> -    }
>  #ifdef _MB_CAPABLE
> -  else if (!strcmp (__lc_ctype, "C-JIS"))
> +  int unicode = 0;
> +
> +  if (!strcmp (__locale_charset (), "JIS"))
>      {
>        c = __jp2uc (c, JP_JIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-SJIS"))
> +  else if (!strcmp (__locale_charset (), "SJIS"))
>      {
>        c = __jp2uc (c, JP_SJIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-EUCJP"))
> +  else if (!strcmp (__locale_charset (), "EUCJP"))
>      {
>        c = __jp2uc (c, JP_EUCJP);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-UTF-8"))
> +  else if (!strcmp (__locale_charset (), "UTF-8"))
>      {
>        unicode = 1;
>      }
> Index: libc/ctype/local.h
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/local.h,v
> retrieving revision 1.1
> diff -u -p -r1.1 local.h
> --- libc/ctype/local.h	20 Sep 2002 20:13:10 -0000	1.1
> +++ libc/ctype/local.h	27 Feb 2009 10:40:19 -0000
> @@ -20,7 +20,7 @@
>  #define WC_UPPER	11
>  #define WC_XDIGIT	12
>  
> -extern char __lc_ctype[12];
> +extern char *__locale_charset ();
>  
>  /* Japanese encoding types supported */
>  #define JP_JIS		1
> Index: libc/ctype/towlower.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/towlower.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 towlower.c
> --- libc/ctype/towlower.c	28 Oct 2005 21:33:22 -0000	1.4
> +++ libc/ctype/towlower.c	27 Feb 2009 10:40:19 -0000
> @@ -69,30 +69,25 @@ No supporting OS subroutines are require
>  wint_t
>  _DEFUN(towlower,(c), wint_t c)
>  {
> +#ifdef _MB_CAPABLE
>    int unicode = 0;
>  
> -  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> -    {
> -      unicode = 0;
> -      /* fall-through */ 
> -    }
> -#ifdef _MB_CAPABLE
> -  else if (!strcmp (__lc_ctype, "C-JIS"))
> +  if (!strcmp (__locale_charset (), "JIS"))
>      {
>        c = __jp2uc (c, JP_JIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-SJIS"))
> +  else if (!strcmp (__locale_charset (), "SJIS"))
>      {
>        c = __jp2uc (c, JP_SJIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-EUCJP"))
> +  else if (!strcmp (__locale_charset (), "EUCJP"))
>      {
>        c = __jp2uc (c, JP_EUCJP);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-UTF-8"))
> +  else if (!strcmp (__locale_charset (), "UTF-8"))
>      {
>        unicode = 1;
>      }
> Index: libc/ctype/towupper.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/ctype/towupper.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 towupper.c
> --- libc/ctype/towupper.c	28 Oct 2005 21:33:22 -0000	1.4
> +++ libc/ctype/towupper.c	27 Feb 2009 10:40:19 -0000
> @@ -69,30 +69,25 @@ No supporting OS subroutines are require
>  wint_t
>  _DEFUN(towupper,(c), wint_t c)
>  {
> +#ifdef _MB_CAPABLE
>    int unicode = 0;
>  
> -  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
> -    {
> -      unicode = 0;
> -      /* fall-through */ 
> -    }
> -#ifdef _MB_CAPABLE
> -  else if (!strcmp (__lc_ctype, "C-JIS"))
> +  if (!strcmp (__locale_charset (), "JIS"))
>      {
>        c = __jp2uc (c, JP_JIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-SJIS"))
> +  else if (!strcmp (__locale_charset (), "SJIS"))
>      {
>        c = __jp2uc (c, JP_SJIS);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-EUCJP"))
> +  else if (!strcmp (__locale_charset (), "EUCJP"))
>      {
>        c = __jp2uc (c, JP_EUCJP);
>        unicode = 1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-UTF-8"))
> +  else if (!strcmp (__locale_charset (), "UTF-8"))
>      {
>        unicode = 1;
>      }
> Index: libc/locale/locale.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/locale/locale.c,v
> retrieving revision 1.8
> diff -u -p -r1.8 locale.c
> --- libc/locale/locale.c	23 Apr 2004 21:44:21 -0000	1.8
> +++ libc/locale/locale.c	27 Feb 2009 10:40:19 -0000
> @@ -42,13 +42,16 @@ execution environment for international 
>  information; <<localeconv>> reports on the settings of the current
>  locale.
>  
> -This is a minimal implementation, supporting only the required <<"C">>
> -value for <[locale]>; strings representing other locales are not
> -honored unless _MB_CAPABLE is defined in which case three new
> -extensions are allowed for LC_CTYPE or LC_MESSAGES only: <<"C-JIS">>, 
> -<<"C-EUCJP">>, <<"C-SJIS">>, or <<"C-ISO-8859-1">>.  (<<"">> is 
> -also accepted; it represents the default locale
> -for an implementation, here equivalent to <<"C">>.)
> +This is a minimal implementation, supporting only the required <<"POSIX">>
> +and <<"C">> values for <[locale]>; strings representing other locales are not
> +honored unless _MB_CAPABLE is defined in which case POSIX locale strings
> +are allowed, plus five extensions supported for backward compatibility with
> +older implementations using newlib: <<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>,
> +<<"C-SJIS">>, or <<"C-ISO-8859-x">> with 1 <= x <= 15.  Even when using
> +POSIX locale strings, the only charsets allowed are <<"UTF-8">>, <<"JIS">>,
> +<<"EUCJP">>, <<"SJIS">>, or <<"ISO-8859-x">> with 1 <= x <= 15.  (<<"">> is 
> +also accepted; if given, the settings are read from the corresponding
> +LC_* environment variables and $LANG according to POSIX rules.
>  
>  If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns
>  a pointer to the string representing the current locale (always
> @@ -66,9 +69,13 @@ in effect.  
>  <[reent]> is a pointer to a reentrancy structure.
>  
>  RETURNS
> -<<setlocale>> returns either a pointer to a string naming the locale
> -currently in effect (always <<"C">> for this implementation, or, if
> -the locale request cannot be honored, <<NULL>>.
> +A successful call to <<setlocale>> returns a pointer to a string
> +associated with the specified category for the new locale.  The string
> +returned by <<setlocale>> is such that a subsequent call using that
> +string will restore that category (or all categories in case of LC_ALL),
> +to that state.  The application shall not modify the string returned
> +which may be overwritten by a subsequent call to <<setlocale>>.
> +On error, <<setlocale>> returns <<NULL>>.
>  
>  <<localeconv>> returns a pointer to a structure of type <<lconv>>,
>  which describes the formatting and collating conventions in effect (in
> @@ -81,16 +88,50 @@ implementations is the C locale.
>  No supporting OS subroutines are required.
>  */
>  
> +/* Parts of this code are originally taken from FreeBSD. */
>  /*
> - * setlocale, localeconv : internationalize your locale.
> - *                         (Only "C" or null supported).
> + * Copyright (c) 1996 - 2002 FreeBSD Project
> + * Copyright (c) 1991, 1993
> + *      The Regents of the University of California.  All rights reserved.
> + *
> + * This code is derived from software contributed to Berkeley by
> + * Paul Borman at Krystal Technologies.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + * 4. Neither the name of the University nor the names of its contributors
> + *    may be used to endorse or promote products derived from this software
> + *    without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
>   */
>  
>  #include <newlib.h>
> +#include <errno.h>
>  #include <locale.h>
>  #include <string.h>
>  #include <limits.h>
>  #include <reent.h>
> +#include <stdlib.h>
> +
> +#define _LC_LAST      7
> +#define ENCODING_LEN 31
>  
>  #ifdef __CYGWIN__
>  int __declspec(dllexport) __mb_cur_max = 1;
> @@ -109,11 +150,48 @@ static _CONST struct lconv lconv = 
>    CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
>  };
>  
> +#ifdef _MB_CAPABLE
> +/*
> + * Category names for getenv()
> + */
> +static char *categories[_LC_LAST] = {
> +  "LC_ALL",
> +  "LC_COLLATE",
> +  "LC_CTYPE",
> +  "LC_MONETARY",
> +  "LC_NUMERIC",
> +  "LC_TIME",
> +  "LC_MESSAGES",
> +};
>  
> -char * _EXFUN(__locale_charset,(_VOID));
> +/*
> + * Current locales for each category
> + */
> +static char current_categories[_LC_LAST][ENCODING_LEN + 1] = {
> +    "C",
> +    "C",
> +    "C",
> +    "C",
> +    "C",
> +    "C",
> +    "C",
> +};
>  
> -static char *charset = "ISO-8859-1";
> -char __lc_ctype[12] = "C";
> +/*
> + * The locales we are going to try and load
> + */
> +static char new_categories[_LC_LAST][ENCODING_LEN + 1];
> +static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
> +
> +static char current_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)];
> +static char *currentlocale(void);
> +static char *loadlocale(struct _reent *, int);
> +static const char *__get_locale_env(struct _reent *, int);
> +
> +#endif
> +
> +static char lc_ctype_charset[ENCODING_LEN + 1] = "ISO-8859-1";
> +static char lc_message_charset[ENCODING_LEN + 1] = "ISO-8859-1";
>  
>  char *
>  _DEFUN(_setlocale_r, (p, category, locale),
> @@ -124,154 +202,303 @@ _DEFUN(_setlocale_r, (p, category, local
>  #ifndef _MB_CAPABLE
>    if (locale)
>      { 
> -      if (strcmp (locale, "C") && strcmp (locale, ""))
> -        return 0;
> +      if (strcmp (locale, "POSIX") && strcmp (locale, "C")
> +	  && strcmp (locale, ""))
> +        return NULL;
>        p->_current_category = category;  
>        p->_current_locale = locale;
>      }
>    return "C";
>  #else
> -  static char last_lc_ctype[12] = "C";
> -  static char lc_messages[12] = "C";
> -  static char last_lc_messages[12] = "C";
> +  int i, j, len, saverr;
> +  const char *env, *r;
>  
> -  if (locale)
> +  if (category < LC_ALL || category >= _LC_LAST)
>      {
> -      char *locale_name = (char *)locale;
> -      if (category != LC_CTYPE && category != LC_MESSAGES) 
> -        { 
> -          if (strcmp (locale, "C") && strcmp (locale, ""))
> -            return 0;
> -          if (category == LC_ALL)
> -            {
> -              strcpy (last_lc_ctype, __lc_ctype);
> -              strcpy (__lc_ctype, "C");
> -              strcpy (last_lc_messages, lc_messages);
> -              strcpy (lc_messages, "C");
> -              __mb_cur_max = 1;
> -            }
> -        }
> -      else
> -        {
> -          if (locale[0] == 'C' && locale[1] == '-')
> -            {
> -              switch (locale[2])
> -                {
> -                case 'U':
> -                  if (strcmp (locale, "C-UTF-8"))
> -                    return 0;
> -                break;
> -                case 'J':
> -                  if (strcmp (locale, "C-JIS"))
> -                    return 0;
> -                break;
> -                case 'E':
> -                  if (strcmp (locale, "C-EUCJP"))
> -                    return 0;
> -                break;
> -                case 'S':
> -                  if (strcmp (locale, "C-SJIS"))
> -                    return 0;
> -                break;
> -                case 'I':
> -                  if (strcmp (locale, "C-ISO-8859-1"))
> -                    return 0;
> -                break;
> -                default:
> -                  return 0;
> -                }
> -            }
> -          else 
> -            {
> -              if (strcmp (locale, "C") && strcmp (locale, ""))
> -                return 0;
> -              locale_name = "C"; /* C is always the default locale */
> -            }
> -
> -          if (category == LC_CTYPE)
> -            {
> -              strcpy (last_lc_ctype, __lc_ctype);
> -              strcpy (__lc_ctype, locale_name);
> -
> -              __mb_cur_max = 1;
> -              if (locale[1] == '-')
> -                {
> -                  switch (locale[2])
> -                    {
> -                    case 'U':
> -                      __mb_cur_max = 6;
> -                    break;
> -                    case 'J':
> -                      __mb_cur_max = 8;
> -                    break;
> -                    case 'E':
> -                      __mb_cur_max = 2;
> -                    break;
> -                    case 'S':
> -                      __mb_cur_max = 2;
> -                    break;
> -                    case 'I':
> -                    default:
> -                      __mb_cur_max = 1;
> -                    }
> -                }
> -            }
> -          else
> -            {
> -              strcpy (last_lc_messages, lc_messages);
> -              strcpy (lc_messages, locale_name);
> -
> -              charset = "ISO-8859-1";
> -              if (locale[1] == '-')
> -                {
> -                  switch (locale[2])
> -                    {
> -                    case 'U':
> -                      charset = "UTF-8";
> -                    break;
> -                    case 'J':
> -                      charset = "JIS";
> -                    break;
> -                    case 'E':
> -                      charset = "EUCJP";
> -                    break;
> -                    case 'S':
> -                      charset = "SJIS";
> -                    break;
> -                    case 'I':
> -                      charset = "ISO-8859-1";
> -                    break;
> -                    default:
> -                      return 0;
> -                    }
> -                }
> -            }
> -        }
> -      p->_current_category = category;  
> -      p->_current_locale = locale;
> +      p->_errno = EINVAL;
> +      return NULL;
> +    }
> +
> +  if (locale == NULL)
> +    return category != LC_ALL ? current_categories[category] : currentlocale();
>  
> -      if (category == LC_CTYPE)
> -        return last_lc_ctype;
> -      else if (category == LC_MESSAGES)
> -        return last_lc_messages;
> +  /*
> +   * Default to the current locale for everything.
> +   */
> +  for (i = 1; i < _LC_LAST; ++i)
> +    strcpy (new_categories[i], current_categories[i]);
> +
> +  /*
> +   * Now go fill up new_categories from the locale argument
> +   */
> +  if (!*locale)
> +    {
> +      if (category == LC_ALL)
> +	{
> +	  for (i = 1; i < _LC_LAST; ++i)
> +	    {
> +	      env = __get_locale_env (p, i);
> +	      if (strlen (env) > ENCODING_LEN)
> +		{
> +		  p->_errno = EINVAL;
> +		  return NULL;
> +		}
> +	      strcpy (new_categories[i], env);
> +	    }
> +	}
> +      else
> +	{
> +	  env = __get_locale_env (p, category);
> +	  if (strlen (env) > ENCODING_LEN)
> +	    {
> +	      p->_errno = EINVAL;
> +	      return NULL;
> +	    }
> +	  strcpy (new_categories[category], env);
> +	}
> +    }
> +  else if (category != LC_ALL)
> +    {
> +      if (strlen (locale) > ENCODING_LEN)
> +	{
> +	  p->_errno = EINVAL;
> +	  return NULL;
> +	}
> +      strcpy (new_categories[category], locale);
>      }
>    else
>      {
> -      if (category == LC_CTYPE)
> -        return __lc_ctype;
> -      else if (category == LC_MESSAGES)
> -        return lc_messages;
> +      if ((r = strchr (locale, '/')) == NULL)
> +	{
> +	  if (strlen (locale) > ENCODING_LEN)
> +	    {
> +	      p->_errno = EINVAL;
> +	      return NULL;
> +	    }
> +	  for (i = 1; i < _LC_LAST; ++i)
> +	    strcpy (new_categories[i], locale);
> +	}
> +      else
> +	{
> +	  for (i = 1; r[1] == '/'; ++r)
> +	    ;
> +	  if (!r[1])
> +	    {
> +	      p->_errno = EINVAL;
> +	      return NULL;  /* Hmm, just slashes... */
> +	    }
> +	  do
> +	    {
> +	      if (i == _LC_LAST)
> +		break;  /* Too many slashes... */
> +	      if ((len = r - locale) > ENCODING_LEN)
> +		{
> +		  p->_errno = EINVAL;
> +		  return NULL;
> +		}
> +	      strlcpy (new_categories[i], locale, len + 1);
> +	      i++;
> +	      while (*r == '/')
> +		r++;
> +	      locale = r;
> +	      while (*r && *r != '/')
> +		r++;
> +	    }
> +	  while (*locale);
> +	  while (i < _LC_LAST)
> +	    {
> +	      strcpy (new_categories[i], new_categories[i-1]);
> +	      i++;
> +	    }
> +	}
>      }
> - 
> -  return "C";
> +
> +  if (category != LC_ALL)
> +    return loadlocale (p, category);
> +
> +  for (i = 1; i < _LC_LAST; ++i)
> +    {
> +      strcpy (saved_categories[i], current_categories[i]);
> +      if (loadlocale (p, i) == NULL)
> +	{
> +	  saverr = p->_errno;
> +	  for (j = 1; j < i; j++)
> +	    {
> +	      strcpy (new_categories[j], saved_categories[j]);
> +	      if (loadlocale (p, j) == NULL)
> +		{
> +		  strcpy (new_categories[j], "C");
> +		  loadlocale (p, j);
> +		}
> +	    }
> +	  p->_errno = saverr;
> +	  return NULL;
> +	}
> +    }
> +  return currentlocale ();
>  #endif
> +}
> +
> +#ifdef _MB_CAPABLE
> +static char *
> +currentlocale()
> +{
> +        int i;
> +
> +        (void)strcpy(current_locale_string, current_categories[1]);
> +
> +        for (i = 2; i < _LC_LAST; ++i)
> +                if (strcmp(current_categories[1], current_categories[i])) {
> +                        for (i = 2; i < _LC_LAST; ++i) {
> +                                (void)strcat(current_locale_string, "/");
> +                                (void)strcat(current_locale_string,
> +                                             current_categories[i]);
> +                        }
> +                        break;
> +                }
> +        return (current_locale_string);
> +}
> +#endif
> +
> +#ifdef _MB_CAPABLE
> +static char *
> +loadlocale(struct _reent *p, int category)
> +{
> +  /* At this point a full-featured system would just load the locale
> +     specific data from the locale files.
> +     What we do here for now is to check the incoming string for correctness.
> +     The string must be in one of the allowed locale strings, either
> +     one in POSIX-style, or one in the old newlib style to maintain
> +     backward compatibility.  If the local string is correct, the charset
> +     is extracted and stored in lc_ctype_charset or lc_message_charset
> +     dependent on the cateogry. */
> +  char *locale = new_categories[category];
> +  char charset[ENCODING_LEN + 1];
> +  unsigned long val;
> +  char *end;
> +  int mbc_max;
>    
> +  /* "POSIX" is translated to "C", as on Linux. */
> +  if (!strcmp (locale, "POSIX"))
> +    strcpy (locale, "C");
> +  if (!strcmp (locale, "C"))				/* Default "C" locale */
> +    strcpy (charset, "ISO-8859-1");
> +  else if (locale[0] == 'C' && locale[1] == '-')	/* Old newlib style */
> +	strcpy (charset, locale + 2);
> +  else							/* POSIX style */
> +    {
> +      char *c = locale;
> +
> +      /* Don't use ctype macros here, they might be localized. */
> +      /* Language */
> +      if (c[0] <= 'a' || c[0] >= 'z'
> +	  || c[1] <= 'a' || c[1] >= 'z')
> +	return NULL;
> +      c += 2;
> +      if (c[0] == '_')
> +        {
> +	  /* Territory */
> +	  ++c;
> +	  if (c[0] <= 'A' || c[0] >= 'Z'
> +	      || c[1] <= 'A' || c[1] >= 'Z')
> +	    return NULL;
> +	  c += 2;
> +	}
> +      if (c[0] == '.')
> +	{
> +	  /* Charset */
> +	  strcpy (charset, c + 1);
> +	  if ((c = strchr (charset, '@')))
> +	    /* Strip off modifier */
> +	    *c = '\0';
> +	}
> +      else if (c[0] == '\0' || c[0] == '@')
> +	/* End of string or just a modifier */
> +	strcpy (charset, "ISO-8859-1");
> +      else
> +	/* Invalid string */
> +      	return NULL;
> +    }
> +  /* We only support this subset of charsets. */
> +  switch (charset[0])
> +    {
> +    case 'U':
> +      if (strcmp (charset, "UTF-8"))
> +	return NULL;
> +      mbc_max = 6;
> +    break;
> +    case 'J':
> +      if (strcmp (charset, "JIS"))
> +	return NULL;
> +      mbc_max = 8;
> +    break;
> +    case 'E':
> +      if (strcmp (charset, "EUCJP"))
> +	return NULL;
> +      mbc_max = 2;
> +    break;
> +    case 'S':
> +      if (strcmp (charset, "SJIS"))
> +	return NULL;
> +      mbc_max = 2;
> +    break;
> +    case 'I':
> +    default:
> +      /* Must be exactly one of ISO-8859-1, [...] ISO-8859-15. */
> +      if (strncmp (charset, "ISO-8859-", 9))
> +	return NULL;
> +      val = strtol (charset + 9, &end, 10);
> +      if (val < 1 || val > 15 || *end)
> +	return NULL;
> +      mbc_max = 1;
> +      break;
> +    }
> +  if (category == LC_CTYPE)
> +    {
> +      strcpy (lc_ctype_charset, charset);
> +      __mb_cur_max = mbc_max;
> +    }
> +  else if (category == LC_MESSAGES)
> +    strcpy (lc_message_charset, charset);
> +  p->_current_category = category;  
> +  p->_current_locale = locale;
> +  return strcpy(current_categories[category], new_categories[category]);
> +}
> +
> +static const char *
> +__get_locale_env(struct _reent *p, int category)
> +{
> +  const char *env;
> +
> +  /* 1. check LC_ALL. */
> +  env = _getenv_r (p, categories[0]);
> +
> +  /* 2. check LC_* */
> +  if (env == NULL || !*env)
> +    env = _getenv_r (p, categories[category]);
> +
> +  /* 3. check LANG */
> +  if (env == NULL || !*env)
> +    env = _getenv_r (p, "LANG");
> +
> +  /* 4. if none is set, fall to "C" */
> +  if (env == NULL || !*env)
> +    env = "C";
> +
> +  return env;
>  }
> +#endif
>  
>  char *
>  _DEFUN_VOID(__locale_charset)
>  {
> -  return charset;
> +  return lc_ctype_charset;
> +}
> +
> +char *
> +_DEFUN_VOID(__locale_msgcharset)
> +{
> +  return lc_message_charset;
>  }
>  
>  struct lconv *
> Index: libc/stdlib/mbtowc_r.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/stdlib/mbtowc_r.c,v
> retrieving revision 1.9
> diff -u -p -r1.9 mbtowc_r.c
> --- libc/stdlib/mbtowc_r.c	25 Feb 2009 09:10:09 -0000	1.9
> +++ libc/stdlib/mbtowc_r.c	27 Feb 2009 10:40:20 -0000
> @@ -45,8 +45,6 @@ static JIS_ACTION JIS_action_table[JIS_S
>  /* we override the mbstate_t __count field for more complex encodings and use it store a state value */
>  #define __state __count
>  
> -extern char __lc_ctype[12];
> -
>  int
>  _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
>          struct _reent *r   _AND
> @@ -65,9 +63,9 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
>      return -2;
>  
>  #ifdef _MB_CAPABLE
> -  if (strlen (__lc_ctype) <= 1)
> +  if (strlen (__locale_charset ()) <= 1)
>      { /* fall-through */ }
> -  else if (!strcmp (__lc_ctype, "C-UTF-8"))
> +  else if (!strcmp (__locale_charset (), "UTF-8"))
>      {
>        int ch;
>        int i = 0;
> @@ -221,7 +219,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
>        else
>  	return -1;
>      }      
> -  else if (!strcmp (__lc_ctype, "C-SJIS"))
> +  else if (!strcmp (__locale_charset (), "SJIS"))
>      {
>        int ch;
>        int i = 0;
> @@ -251,7 +249,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
>  	    return -1;
>  	}
>      }
> -  else if (!strcmp (__lc_ctype, "C-EUCJP"))
> +  else if (!strcmp (__locale_charset (), "EUCJP"))
>      {
>        int ch;
>        int i = 0;
> @@ -281,7 +279,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
>  	    return -1;
>  	}
>      }
> -  else if (!strcmp (__lc_ctype, "C-JIS"))
> +  else if (!strcmp (__locale_charset (), "JIS"))
>      {
>        JIS_STATE curr_state;
>        JIS_ACTION action;
> Index: libc/stdlib/wctomb_r.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/stdlib/wctomb_r.c,v
> retrieving revision 1.8
> diff -u -p -r1.8 wctomb_r.c
> --- libc/stdlib/wctomb_r.c	25 Feb 2009 09:10:09 -0000	1.8
> +++ libc/stdlib/wctomb_r.c	27 Feb 2009 10:40:20 -0000
> @@ -7,8 +7,6 @@
>  /* for some conversions, we use the __count field as a place to store a state value */
>  #define __state __count
>  
> -extern char __lc_ctype[12];
> -
>  int
>  _DEFUN (_wctomb_r, (r, s, wchar, state),
>          struct _reent *r     _AND 
> @@ -21,9 +19,9 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
>       is 4, as is the case on cygwin.  */
>    wint_t wchar = _wchar;
>  
> -  if (strlen (__lc_ctype) <= 1)
> +  if (strlen (__locale_charset ()) <= 1)
>      { /* fall-through */ }
> -  else if (!strcmp (__lc_ctype, "C-UTF-8"))
> +  else if (!strcmp (__locale_charset (), "UTF-8"))
>      {
>        if (s == NULL)
>          return 0; /* UTF-8 encoding is not state-dependent */
> @@ -93,9 +91,9 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
>            return 4;
>          }
>        else
> -        return -1;
> +	return -1;
>      }
> -  else if (!strcmp (__lc_ctype, "C-SJIS"))
> +  else if (!strcmp (__locale_charset (), "SJIS"))
>      {
>        unsigned char char2 = (unsigned char)wchar;
>        unsigned char char1 = (unsigned char)(wchar >> 8);
> @@ -113,10 +111,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
>                return 2;
>              }
>            else
> -            return -1;
> +	    return -1;
>          }
>      }
> -  else if (!strcmp (__lc_ctype, "C-EUCJP"))
> +  else if (!strcmp (__locale_charset (), "EUCJP"))
>      {
>        unsigned char char2 = (unsigned char)wchar;
>        unsigned char char1 = (unsigned char)(wchar >> 8);
> @@ -134,10 +132,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
>                return 2;
>              }
>            else
> -            return -1;
> +	    return -1;
>          }
>      }
> -  else if (!strcmp (__lc_ctype, "C-JIS"))
> +  else if (!strcmp (__locale_charset (), "JIS"))
>      {
>        int cnt = 0; 
>        unsigned char char2 = (unsigned char)wchar;
> @@ -165,7 +163,7 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
>                return cnt + 2;
>              }
>            else
> -            return -1;
> +	    return -1;
>          }
>        else
>          {
> Index: libc/sys/linux/intl/loadmsgcat.c
> ===================================================================
> RCS file: /cvs/src/src/newlib/libc/sys/linux/intl/loadmsgcat.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 loadmsgcat.c
> --- libc/sys/linux/intl/loadmsgcat.c	23 Apr 2004 21:44:22 -0000	1.2
> +++ libc/sys/linux/intl/loadmsgcat.c	27 Feb 2009 10:40:20 -0000
> @@ -266,8 +266,8 @@ _nl_init_domain_conv (domain_file, domai
>  		  outcharset = (*_nl_current[LC_CTYPE])->values[_NL_ITEM_INDEX (CODESET)].string;
>  # else
>  #  if HAVE_ICONV
> -		  extern const char *__locale_charset (void);
> -		  outcharset = __locale_charset ();
> +		  extern const char *__locale_msgcharset (void);
> +		  outcharset = __locale_msgcharset ();
>  #  endif
>  # endif
>  		}
>
>
>   



More information about the Newlib mailing list