[newlib-cygwin/main] Cygwin: add more UTF-32 helper functions

Corinna Vinschen corinna@sourceware.org
Mon Feb 20 22:00:59 GMT 2023


https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=ffba9604d10fa0df27eea5e1930a215086608581

commit ffba9604d10fa0df27eea5e1930a215086608581
Author:     Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Mon Feb 20 22:29:37 2023 +0100
Commit:     Corinna Vinschen <corinna@vinschen.de>
CommitDate: Mon Feb 20 22:29:37 2023 +0100

    Cygwin: add more UTF-32 helper functions
    
    wcintowcs: convert UTF-16 to UTF-32 string
    wcilen: return number of characters in a UTF-32 string
    wcincmp: compare two fixed-size UTF-32 strings
    
    Used in followup patches introducing collating symbols
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 winsup/cygwin/local_includes/wchar.h | 46 ++++++++++++++++++++++++++++++++++++
 winsup/cygwin/strfuncs.cc            | 17 +++++++++++++
 2 files changed, 63 insertions(+)

diff --git a/winsup/cygwin/local_includes/wchar.h b/winsup/cygwin/local_includes/wchar.h
index 10b44791c505..6f2a4ad10f20 100644
--- a/winsup/cygwin/local_includes/wchar.h
+++ b/winsup/cygwin/local_includes/wchar.h
@@ -39,10 +39,56 @@ extern wctomb_f __utf8_wctomb;
 
 #define __WCTOMB (__get_current_locale ()->wctomb)
 
+/* convert wint_t string to wchar_t string.  Make sure dest
+   has room for at least twice as much characters to account
+   for surrogate pairs, plus a wchar_t NUL. */
+void wcintowcs (wchar_t *, wint_t *, size_t);
+
 /* replacement function for mbrtowc, returning a wint_t representing
    a UTF-32 value. Defined in strfuncs.cc */
 extern size_t mbrtowi (wint_t *, const char *, size_t, mbstate_t *);
 
+/* like wcslen, just for wint_t */
+static inline size_t
+wcilen (const wint_t *wcs)
+{
+  size_t ret = 0;
+
+  if (wcs)
+    while (*wcs++)
+      ++ret;
+  return ret;
+}
+
+/* like wcscmp, just for wint_t */
+static inline int
+wcicmp (const wint_t *s1, const wint_t *s2)
+{
+  while (*s1 == *s2++)
+    if (*s1++ == 0)
+      return (0);
+  return (*s1 - *--s2);
+}
+
+/* like wcsncmp, just for wint_t */
+static inline int
+wcincmp (const wint_t *s1, const wint_t *s2, size_t n)
+{
+  if (n == 0)
+    return (0);
+  do
+    {
+      if (*s1 != *s2++)
+        {
+          return (*s1 - *--s2);
+        }
+      if (*s1++ == 0)
+        break;
+    }
+  while (--n != 0);
+  return (0);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc
index d62179a1de06..e343a2fcc6e8 100644
--- a/winsup/cygwin/strfuncs.cc
+++ b/winsup/cygwin/strfuncs.cc
@@ -112,6 +112,23 @@ transform_chars_af_unix (PWCHAR out, const char *path, __socklen_t len)
   return out;
 }
 
+/* convert wint_t string to wchar_t string.  Make sure dest
+   has room for at least twice as much characters to account
+   for surrogate pairs, plus a wchar_t NUL. */
+extern "C" void
+wcintowcs (wchar_t *dest, wint_t *src, size_t len)
+{
+  while (*src && len-- > 0)
+    if (*src > 0xffff)
+      {
+	*dest++ = ((*src - 0x10000) >> 10) + 0xd800;
+	*dest++ = ((*src++ - 0x10000) & 0x3ff) + 0xdc00;
+      }
+    else
+	*dest++ = *src++;
+  *dest = '\0';
+}
+
 /* replacement function for mbrtowc, returning a wint_t representing
    a UTF-32 value. */
 extern "C" size_t


More information about the Cygwin-cvs mailing list