The old $LANG-in-a-box trick (was Re: X11R7.5 and C.UTF-8)

[redirected to cygwin-developers]

On Dec  3 14:48, Corinna Vinschen wrote:
> On Dec  3 13:16, Andy Koppe wrote:
> > 2009/12/3 Thomas Dickey:
> > >> From
> > >>,
> > >> §7.2:
> > >>
> > >> "The tables in Locale Definition describe the characteristics and
> > >> behavior of the POSIX locale for data consisting entirely of
> > >> characters from the portable character set and the control character
> > >> set. For other characters, the behavior is unspecified."
> > >>
> > >> This means that characters 0..127 have to be treated as ASCII, but
> > >> beyond that an implementation can do what it wants. And on Cygwin 1.7,
> > >> plain "C" actually does imply UTF-8, which happily is
> > >> backward-compatible with ASCII.
> > >
> > > That's an interpretation that so far hasn't been blessed by the standards
> > > people. ?Any discussion of this topic should mention that, as a caveat.
> > 
> > Fair point. It also means that apps are entitled to assume that "C"
> > supports no more than ASCII, which is why Cygwin 1.7's default locale
> > is C.UTF-8. A default locale setting based on the user's language
> > selection would be better, but we don't have that (yet?).
> Try the attached.  Note:  It has a hidden "--testloop" option...

I created a new, simplified version of my tiny getlocale tool, which
prints "export LANG=..." to stdout, or "setenv LANG ..." if the -c
option has been given.  See attached source.

In theory we could use it in /etc/profile.d/lang.{c}sh:


  test -z "${LC_ALL:-${LC_CTYPE:-$LANG}}" && eval $(getlocale -U)


  if ( $?LC_ALL == 0 && $?LC_CTYPE == 0 && $?LANG == 0 ) eval `getlocale -cU`

If that's desired, I could easily create a getlocale package for the
Base category.

One problem we still have is that the iso639 language code returned by
Windows is a three-letter code in rare cases (see the --testloop output)
This is not feasible for newlib so far.  Maybe the getlocale tool should
drop back to "C" in these cases.


Corinna Vinschen                  Please, send mails regarding Cygwin to
Cygwin Project Co-Leader          cygwin AT cygwin DOT com
Red Hat
#include <stdio.h>
#include <getopt.h>
#include <locale.h>
#include <windows.h>

#define VERSION  "1.1"

extern char *__progname;

version ()
  printf ("%s (Cygwin) %s\n", __progname, VERSION);
  exit (0);

usage (FILE * stream, int status)
  fprintf (stream, "\n\
Usage: %s [-suU] [-l LCID]\n\
Return POSIX LANG identifier corresponding to a locale, default is the\n\
system default locale\n\
Possible options are:\n\
  -c, --csh         return LANG setting in C-shell syntax\n\
  -s, --system      return LANG for the system's default locale\n\
  -u, --user        return LANG for the current user's default locale\n\
  -l, --lcid LCID   return LANG for the LCID given as argument\n\
  -U, --UTF-8       always attach .UTF-8 to LANG\n\
  -h, --help        this text\n\
  -V, --version     print the version of %s and exit\n",
	   __progname, __progname);
  exit (status);

struct option longopts[] = {
  {"csh", no_argument, NULL, 'c'},
  {"system", no_argument, NULL, 's'},
  {"user", no_argument, NULL, 'u'},
  {"lcid", required_argument, NULL, 'l'},
  {"UTF-8", no_argument, NULL, 'U'},
  {"help", no_argument, NULL, 'h'},
  {"version", no_argument, NULL, 'V'},
  {"testloop", no_argument, NULL, 'T'},
  {0, no_argument, NULL, 0}
const char *opts = "csul:UhV";

getlocale (LCID lcid, BOOL utf, BOOL csh, BOOL test)
  UINT codepage;
  wchar_t iso639[10];
  wchar_t iso3166[10];

  if (!GetLocaleInfoW (lcid,
		      (PWCHAR) &codepage, sizeof codepage)
      || !GetLocaleInfoW (lcid, LOCALE_SISO639LANGNAME, iso639, 10)
      || !GetLocaleInfoW (lcid, LOCALE_SISO3166CTRYNAME, iso3166, 10))
      if (!test)
        fprintf (stderr, "%s: Non existant locale\n", __progname);
      return 2;
  if (utf)
    codepage = 0;
  if (test)
      wchar_t cty[256];
      wchar_t lang[256];
      GetLocaleInfoW (lcid, LOCALE_SENGCOUNTRY, cty, 256);
      GetLocaleInfoW (lcid, LOCALE_SENGLANGUAGE, lang, 256);
      printf ("0x%04x=\"%ls_%ls\", %ls (%ls)\n", (unsigned) lcid, iso639,
	      iso3166, lang, cty);
      return 0;
  else if (csh)
    printf ("setenv LANG ");
    printf ("export LANG=");
  printf ("\"%ls_%ls%s\"\n", iso639, iso3166, codepage ? "" : ".UTF-8");
  return 0;

int main (int argc, char **argv)
  int opt;
  BOOL csh = FALSE;
  BOOL utf = FALSE;
  BOOL test = FALSE;

  setlocale (LC_ALL, "");
  while ((opt = getopt_long (argc, argv, opts, longopts, NULL)) != EOF)
    switch (opt)
      case 's':
      case 'u':
      case 'c':
	csh = TRUE;
      case 'l':
	lcid = strtoul (optarg, NULL, 0);
      case 'U':
	utf = TRUE;
      case 'h':
	usage (stdout, 0);
      case 'V':
	version ();
      case 'T':
        test = TRUE;
	usage (stderr, 1);
  if (test)
      unsigned lang, sublang;

      for (lang = 1; lang <= 0x3ff; ++lang)
	for (sublang = 1; sublang <= 0x3f; ++sublang)
	  getlocale ((sublang << 10) | lang, FALSE, FALSE, TRUE);
      return 0;
  return getlocale (lcid, utf, csh, FALSE);

