This is the mail archive of the cygwin-developers mailing list for the Cygwin project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

The old $LANG-in-a-box trick (was Re: X11R7.5 and C.UTF-8)



[redirected to cygwin-developers]


On Dec  3 14:48, Corinna Vinschen wrote:
> On Dec  3 13:16, Andy Koppe wrote:
> > 2009/12/3 Thomas Dickey:
> > >> From
> > >> http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html,
> > >> §7.2:
> > >>
> > >> "The tables in Locale Definition describe the characteristics and
> > >> behavior of the POSIX locale for data consisting entirely of
> > >> characters from the portable character set and the control character
> > >> set. For other characters, the behavior is unspecified."
> > >>
> > >> This means that characters 0..127 have to be treated as ASCII, but
> > >> beyond that an implementation can do what it wants. And on Cygwin 1.7,
> > >> plain "C" actually does imply UTF-8, which happily is
> > >> backward-compatible with ASCII.
> > >
> > > That's an interpretation that so far hasn't been blessed by the standards
> > > people. ?Any discussion of this topic should mention that, as a caveat.
> > 
> > Fair point. It also means that apps are entitled to assume that "C"
> > supports no more than ASCII, which is why Cygwin 1.7's default locale
> > is C.UTF-8. A default locale setting based on the user's language
> > selection would be better, but we don't have that (yet?).
> 
> Try the attached.  Note:  It has a hidden "--testloop" option...

I created a new, simplified version of my tiny getlocale tool, which
prints "export LANG=..." to stdout, or "setenv LANG ..." if the -c
option has been given.  See attached source.

In theory we could use it in /etc/profile.d/lang.{c}sh:

/etc/profile.d/lang.sh:

  test -z "${LC_ALL:-${LC_CTYPE:-$LANG}}" && eval $(getlocale -U)

/etc/profile.d/lang.csh:

  if ( $?LC_ALL == 0 && $?LC_CTYPE == 0 && $?LANG == 0 ) eval `getlocale -cU`

If that's desired, I could easily create a getlocale package for the
Base category.

One problem we still have is that the iso639 language code returned by
Windows is a three-letter code in rare cases (see the --testloop output)
This is not feasible for newlib so far.  Maybe the getlocale tool should
drop back to "C" in these cases.


Corinna

-- 
Corinna Vinschen                  Please, send mails regarding Cygwin to
Cygwin Project Co-Leader          cygwin AT cygwin DOT com
Red Hat
#include <stdio.h>
#include <getopt.h>
#include <locale.h>
#include <windows.h>

#define VERSION  "1.1"

extern char *__progname;

void
version ()
{
  printf ("%s (Cygwin) %s\n", __progname, VERSION);
  exit (0);
}

void
usage (FILE * stream, int status)
{
  fprintf (stream, "\n\
Usage: %s [-suU] [-l LCID]\n\
\n\
Return POSIX LANG identifier corresponding to a locale, default is the\n\
system default locale\n\
Possible options are:\n\
\n\
  -c, --csh         return LANG setting in C-shell syntax\n\
  -s, --system      return LANG for the system's default locale\n\
  -u, --user        return LANG for the current user's default locale\n\
  -l, --lcid LCID   return LANG for the LCID given as argument\n\
  -U, --UTF-8       always attach .UTF-8 to LANG\n\
  -h, --help        this text\n\
  -V, --version     print the version of %s and exit\n",
	   __progname, __progname);
  exit (status);
}

struct option longopts[] = {
  {"csh", no_argument, NULL, 'c'},
  {"system", no_argument, NULL, 's'},
  {"user", no_argument, NULL, 'u'},
  {"lcid", required_argument, NULL, 'l'},
  {"UTF-8", no_argument, NULL, 'U'},
  {"help", no_argument, NULL, 'h'},
  {"version", no_argument, NULL, 'V'},
  {"testloop", no_argument, NULL, 'T'},
  {0, no_argument, NULL, 0}
};
const char *opts = "csul:UhV";

int
getlocale (LCID lcid, BOOL utf, BOOL csh, BOOL test)
{
  UINT codepage;
  wchar_t iso639[10];
  wchar_t iso3166[10];

  if (!GetLocaleInfoW (lcid,
		       LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
		      (PWCHAR) &codepage, sizeof codepage)
      || !GetLocaleInfoW (lcid, LOCALE_SISO639LANGNAME, iso639, 10)
      || !GetLocaleInfoW (lcid, LOCALE_SISO3166CTRYNAME, iso3166, 10))
    {
      if (!test)
        fprintf (stderr, "%s: Non existant locale\n", __progname);
      return 2;
    }
  if (utf)
    codepage = 0;
  if (test)
    {
      wchar_t cty[256];
      wchar_t lang[256];
      GetLocaleInfoW (lcid, LOCALE_SENGCOUNTRY, cty, 256);
      GetLocaleInfoW (lcid, LOCALE_SENGLANGUAGE, lang, 256);
      printf ("0x%04x=\"%ls_%ls\", %ls (%ls)\n", (unsigned) lcid, iso639,
	      iso3166, lang, cty);
      return 0;
    }
  else if (csh)
    printf ("setenv LANG ");
  else
    printf ("export LANG=");
  printf ("\"%ls_%ls%s\"\n", iso639, iso3166, codepage ? "" : ".UTF-8");
  return 0;
}

int main (int argc, char **argv)
{
  int opt;
  LCID lcid = LOCALE_SYSTEM_DEFAULT;
  BOOL csh = FALSE;
  BOOL utf = FALSE;
  BOOL test = FALSE;

  setlocale (LC_ALL, "");
  while ((opt = getopt_long (argc, argv, opts, longopts, NULL)) != EOF)
    switch (opt)
      {
      case 's':
	lcid = LOCALE_SYSTEM_DEFAULT;
	break;
      case 'u':
	lcid = LOCALE_USER_DEFAULT;
	break;
      case 'c':
	csh = TRUE;
	break;
      case 'l':
	lcid = strtoul (optarg, NULL, 0);
	break;
      case 'U':
	utf = TRUE;
	break;
      case 'h':
	usage (stdout, 0);
	break;
      case 'V':
	version ();
	break;
      case 'T':
        test = TRUE;
	break;
      default:
	usage (stderr, 1);
	break;
      }
  if (test)
    {
      unsigned lang, sublang;

      for (lang = 1; lang <= 0x3ff; ++lang)
	for (sublang = 1; sublang <= 0x3f; ++sublang)
	  getlocale ((sublang << 10) | lang, FALSE, FALSE, TRUE);
      return 0;
    }
  return getlocale (lcid, utf, csh, FALSE);
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]