[newlib-cygwin] Cygwin: pty: move codepage evaluation to nlsfuncs.cc

Corinna Vinschen corinna@sourceware.org
Tue Sep 8 08:36:22 GMT 2020


https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=eaed594d736259c42affa7cf58f12492cbc5fb67

commit eaed594d736259c42affa7cf58f12492cbc5fb67
Author: Corinna Vinschen <corinna@vinschen.de>
Date:   Tue Sep 8 10:36:04 2020 +0200

    Cygwin: pty: move codepage evaluation to nlsfuncs.cc
    
    The new function __eval_codepage_from_internal_charset
    is a simplified version of the former code in
    fhandler_tty.cc.  It probably needs some extension,
    but the gist is to use knowledge of internals to
    be as quick as possible.
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 winsup/cygwin/fhandler_tty.cc | 191 ++----------------------------------------
 winsup/cygwin/nlsfuncs.cc     |  48 +++++++++++
 2 files changed, 57 insertions(+), 182 deletions(-)

diff --git a/winsup/cygwin/fhandler_tty.cc b/winsup/cygwin/fhandler_tty.cc
index 8bf39c3e6..6de591d9b 100644
--- a/winsup/cygwin/fhandler_tty.cc
+++ b/winsup/cygwin/fhandler_tty.cc
@@ -1614,8 +1614,8 @@ fhandler_pty_master::write (const void *ptr, size_t len)
   if (to_be_read_from_pcon () && get_ttyp ()->h_pseudo_console)
     {
       size_t nlen;
-      char *buf = convert_mb_str
-	(CP_UTF8, &nlen, get_ttyp ()->term_code_page, (const char *) ptr, len);
+      char *buf = convert_mb_str (CP_UTF8, &nlen, get_ttyp ()->term_code_page,
+				  (const char *) ptr, len);
 
       WaitForSingleObject (input_mutex, INFINITE);
 
@@ -1782,183 +1782,13 @@ fhandler_pty_common::set_close_on_exec (bool val)
   close_on_exec (val);
 }
 
-/* This table is borrowed from mintty: charset.c */
-static const struct {
-  UINT cp;
-  const char *name;
-}
-cs_names[] = {
-  { CP_UTF8, "UTF-8"},
-  { CP_UTF8, "UTF8"},
-  {   20127, "ASCII"},
-  {   20127, "US-ASCII"},
-  {   20127, "ANSI_X3.4-1968"},
-  {   20866, "KOI8-R"},
-  {   20866, "KOI8R"},
-  {   20866, "KOI8"},
-  {   21866, "KOI8-U"},
-  {   21866, "KOI8U"},
-  {   20932, "EUCJP"},
-  {   20932, "EUC-JP"},
-  {     874, "TIS620"},
-  {     874, "TIS-620"},
-  {     932, "SJIS"},
-  {     936, "GBK"},
-  {     936, "GB2312"},
-  {     936, "EUCCN"},
-  {     936, "EUC-CN"},
-  {     949, "EUCKR"},
-  {     949, "EUC-KR"},
-  {     950, "BIG5"},
-  {       0, "NULL"}
-};
-
-static void
-get_locale_from_env (char *locale)
-{
-  const char *env = NULL;
-  char lang[ENCODING_LEN + 1] = {0, }, country[ENCODING_LEN + 1] = {0, };
-  env = getenv ("LC_ALL");
-  if (env == NULL || !*env)
-    env = getenv ("LC_CTYPE");
-  if (env == NULL || !*env)
-    env = getenv ("LANG");
-  if (env == NULL || !*env)
-    {
-      if (GetLocaleInfo (LOCALE_CUSTOM_UI_DEFAULT,
-			  LOCALE_SISO639LANGNAME,
-			  lang, sizeof (lang)))
-	GetLocaleInfo (LOCALE_CUSTOM_UI_DEFAULT,
-		       LOCALE_SISO3166CTRYNAME,
-		       country, sizeof (country));
-      else if (GetLocaleInfo (LOCALE_CUSTOM_DEFAULT,
-			      LOCALE_SISO639LANGNAME,
-			      lang, sizeof (lang)))
-	  GetLocaleInfo (LOCALE_CUSTOM_DEFAULT,
-			 LOCALE_SISO3166CTRYNAME,
-			 country, sizeof (country));
-      else if (GetLocaleInfo (LOCALE_USER_DEFAULT,
-			      LOCALE_SISO639LANGNAME,
-			      lang, sizeof (lang)))
-	  GetLocaleInfo (LOCALE_USER_DEFAULT,
-			 LOCALE_SISO3166CTRYNAME,
-			 country, sizeof (country));
-      else if (GetLocaleInfo (LOCALE_SYSTEM_DEFAULT,
-			      LOCALE_SISO639LANGNAME,
-			      lang, sizeof (lang)))
-	  GetLocaleInfo (LOCALE_SYSTEM_DEFAULT,
-			 LOCALE_SISO3166CTRYNAME,
-			 country, sizeof (country));
-      if (strlen (lang) && strlen (country))
-	__small_sprintf (lang + strlen (lang), "_%s.UTF-8", country);
-      else
-	strcpy (lang , "C.UTF-8");
-      env = lang;
-    }
-  strcpy (locale, env);
-}
-
-static void
-get_langinfo (char *locale_out, char *charset_out)
-{
-  /* Get locale from environment */
-  char new_locale[ENCODING_LEN + 1];
-  get_locale_from_env (new_locale);
-
-  __locale_t loc;
-  memset (&loc, 0, sizeof (loc));
-  const char *locale = __loadlocale (&loc, LC_CTYPE, new_locale);
-  if (!locale)
-    locale = "C";
-
-  const char *charset;
-  struct lc_ctype_T *lc_ctype = (struct lc_ctype_T *) loc.lc_cat[LC_CTYPE].ptr;
-  if (!lc_ctype)
-    charset = "ASCII";
-  else
-    charset = lc_ctype->codeset;
-
-  /* The following code is borrowed from nl_langinfo()
-     in newlib/libc/locale/nl_langinfo.c */
-  /* Convert charset to Linux compatible codeset string. */
-  if (charset[0] == 'A'/*SCII*/)
-    charset = "ANSI_X3.4-1968";
-  else if (charset[0] == 'E')
-    {
-      if (strcmp (charset, "EUCJP") == 0)
-	charset = "EUC-JP";
-      else if (strcmp (charset, "EUCKR") == 0)
-	charset = "EUC-KR";
-      else if (strcmp (charset, "EUCCN") == 0)
-	charset = "GB2312";
-    }
-  else if (charset[0] == 'C'/*Pxxxx*/)
-    {
-      if (strcmp (charset + 2, "874") == 0)
-	charset = "TIS-620";
-      else if (strcmp (charset + 2, "20866") == 0)
-	charset = "KOI8-R";
-      else if (strcmp (charset + 2, "21866") == 0)
-	charset = "KOI8-U";
-      else if (strcmp (charset + 2, "101") == 0)
-	charset = "GEORGIAN-PS";
-      else if (strcmp (charset + 2, "102") == 0)
-	charset = "PT154";
-    }
-  else if (charset[0] == 'S'/*JIS*/)
-    {
-      /* Cygwin uses MSFT's implementation of SJIS, which differs
-	 in some codepoints from the real thing, especially
-	 0x5c: yen sign instead of backslash,
-	 0x7e: overline instead of tilde.
-	 We can't use the real SJIS since otherwise Win32
-	 pathnames would become invalid.  OTOH, if we return
-	 "SJIS" here, then libiconv will do mb<->wc conversion
-	 differently to our internal functions.  Therefore we
-	 return what we really implement, CP932.  This is handled
-	 fine by libiconv. */
-      charset = "CP932";
-    }
-
-  /* Set results */
-  strcpy (locale_out, new_locale);
-  strcpy (charset_out, charset);
-}
-
 void
 fhandler_pty_slave::setup_locale (void)
 {
-  if (get_ttyp ()->term_code_page != 0)
-    return;
+  extern UINT __eval_codepage_from_internal_charset ();
 
-  char locale[ENCODING_LEN + 1] = "C";
-  char charset[ENCODING_LEN + 1] = "ASCII";
-  get_langinfo (locale, charset);
-
-  /* Set terminal code page from locale */
-  /* This code is borrowed from mintty: charset.c */
-  get_ttyp ()->term_code_page = 20127; /* Default ASCII */
-  char charset_u[ENCODING_LEN + 1] = {0, };
-  for (int i=0; charset[i] && i<ENCODING_LEN; i++)
-    charset_u[i] = toupper (charset[i]);
-  unsigned int iso;
-  UINT cp = 20127; /* Default for fallback */
-  if (sscanf (charset_u, "ISO-8859-%u", &iso) == 1
-      || sscanf (charset_u, "ISO8859-%u", &iso) == 1
-      || sscanf (charset_u, "ISO8859%u", &iso) == 1)
-    {
-      if (iso && iso <= 16 && iso !=12)
-	get_ttyp ()->term_code_page = 28590 + iso;
-    }
-  else if (sscanf (charset_u, "CP%u", &cp) == 1)
-    get_ttyp ()->term_code_page = cp;
-  else
-    for (int i=0; cs_names[i].cp; i++)
-      if (strcasecmp (charset_u, cs_names[i].name) == 0)
-	{
-	  get_ttyp ()->term_code_page = cs_names[i].cp;
-	  break;
-	}
+  if (!get_ttyp ()->term_code_page)
+    get_ttyp ()->term_code_page = __eval_codepage_from_internal_charset ();
 }
 
 void
@@ -1977,9 +1807,6 @@ fhandler_pty_slave::fixup_after_exec ()
   if (!close_on_exec ())
     fixup_after_fork (NULL);	/* No parent handle required. */
 
-  /* Set locale */
-  setup_locale ();
-
   /* Hook Console API */
 #define DO_HOOK(module, name) \
   if (!name##_Orig) \
@@ -2205,8 +2032,8 @@ fhandler_pty_master::pty_master_fwd_thread ()
 	      state = 0;
 
 	  size_t nlen;
-	  char *buf = convert_mb_str
-	    (get_ttyp ()->term_code_page, &nlen, CP_UTF8, ptr, wlen);
+	  char *buf = convert_mb_str (get_ttyp ()->term_code_page,
+				      &nlen, CP_UTF8, ptr, wlen);
 
 	  ptr = buf;
 	  wlen = rlen = nlen;
@@ -2228,8 +2055,8 @@ fhandler_pty_master::pty_master_fwd_thread ()
 	  continue;
 	}
       size_t nlen;
-      char *buf = convert_mb_str
-	(get_ttyp ()->term_code_page, &nlen, GetConsoleOutputCP (), ptr, wlen);
+      char *buf = convert_mb_str (get_ttyp ()->term_code_page, &nlen,
+				  GetConsoleOutputCP (), ptr, wlen);
 
       ptr = buf;
       wlen = rlen = nlen;
diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc
index 668d7eb9e..a518d2be3 100644
--- a/winsup/cygwin/nlsfuncs.cc
+++ b/winsup/cygwin/nlsfuncs.cc
@@ -1448,6 +1448,54 @@ __set_charset_from_locale (const char *locale, char *charset)
   stpcpy (charset, cs);
 }
 
+/* Called from fhandler_tty::setup_locale.  Set a codepage which reflects the
+   internal charset setting.  This is *not* necessarily the Windows
+   codepage connected to a locale by default, so we have to set this
+   up explicitely. */
+UINT
+__eval_codepage_from_internal_charset ()
+{
+  const char *charset = __locale_charset (__get_global_locale ());
+  UINT codepage = CP_UTF8; /* Default UTF8 */
+
+  /* The internal charset names are well defined, so we can use shortcuts. */
+  switch (charset[0])
+    {
+    case 'B': /* BIG5 */
+      codepage = 950;
+      break;
+    case 'C': /* CPxxx */
+      codepage = strtoul (charset + 2, NULL, 10);
+      break;
+    case 'E': /* EUCxx */
+      switch (charset[3])
+	{
+	case 'J': /* EUCJP */
+	  codepage = 20932;
+	  break;
+	case 'K': /* EUCKR */
+	  codepage = 949;
+	  break;
+	case 'C': /* EUCCN */
+	  codepage = 936;
+	  break;
+	}
+      break;
+    case 'G': /* GBK/GB2312 */
+      codepage = 936;
+      break;
+    case 'I': /* ISO-8859-x */
+      codepage = strtoul (charset + 9, NULL, 10) + 28590;
+      break;
+    case 'S': /* SJIS */
+      codepage = 932;
+      break;
+    default: /* All set to UTF8 already */
+      break;
+    }
+  return codepage;
+}
+
 /* This function is called from newlib's loadlocale if the locale identifier
    was invalid, one way or the other.  It looks for the file


More information about the Cygwin-cvs mailing list