[PATCH 4/6] generated character data for libc/ctype

Thomas Wolff towo@towo.net
Fri Mar 23 19:28:00 GMT 2018


Am 13.03.2018 um 22:41 schrieb Thomas Wolff:
> Am 13.03.2018 um 22:10 schrieb Corinna Vinschen:
>> On Mar  8 00:21, Thomas Wolff wrote:
>>> From 58a9cfcb253165d7073a9ed25e143daa2e979c10 Mon Sep 17 00:00:00 2001
>>> From: Thomas Wolff <towo@towo.net>
>>> Date: Sun, 25 Feb 2018 17:22:34 +0100
>>> Subject: [PATCH 4/6] use generated character data
>>>
>>> ---
>>>   newlib/libc/ctype/towctrans_l.c |  97 +++++++-
>>> ...
>> ...
>>
>> I pushed a patch.
>>
>>
>> Corinna
>>
> Thanks a lot for hot-fixing this. I'll meditate how this could slip 
> through...
> And I'll also check why this wasn't discovered during my extensive 
> testing.
Thanks again for helping to get this accomplished.
While meditating, I noticed that the bit packing of the case conversion 
entries could use some documentation.
The attached patch adds that (and some tweaking for consistent 
indentation); no code changes.
Thomas
-------------- next part --------------
From f8f4784437d319ad3ac2e3c629335fd0f50bee69 Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Fri, 23 Mar 2018 20:07:22 +0100
Subject: [PATCH] comments to document struct caseconv_entry

explain design of compact (packed) struct caseconv_entry, 
in case it needs to be modified for future Unicode versions;
indentation tweaks

---
 newlib/libc/ctype/towctrans_l.c | 108 +++++++++++++++++++++++++---------------
 1 file changed, 69 insertions(+), 39 deletions(-)

diff --git a/newlib/libc/ctype/towctrans_l.c b/newlib/libc/ctype/towctrans_l.c
index eaabd8c..0f210e0 100644
--- a/newlib/libc/ctype/towctrans_l.c
+++ b/newlib/libc/ctype/towctrans_l.c
@@ -4,8 +4,36 @@
 //#include <errno.h>
 #include "local.h"
 
-enum {EVENCAP, ODDCAP};
+/*
+   struct caseconv_entry describes the case conversion behaviour
+   of a range of Unicode characters.
+   It was designed to be compact for a minimal table size.
+   The range is first...first + diff.
+   Conversion behaviour for a character c in the respective range:
+     mode == TOLO	towlower (c) = c + delta
+     mode == TOUP	towupper (c) = c + delta
+     mode == TOBOTH	(titling case characters)
+			towlower (c) = c + 1
+			towupper (c) = c - 1
+     mode == TO1	capital/small letters are alternating
+	delta == EVENCAP	even codes are capital
+	delta == ODDCAP		odd codes are capital
+			(this correlates with an even/odd first range value
+			as of Unicode 10.0 but we do not rely on this)
+   As of Unicode 10.0, the following field lengths are sufficient
+	first: 17 bits
+	diff: 8 bits
+	delta: 17 bits
+	mode: 2 bits
+   The reserve of 4 bits (to limit the struct to 6 bytes)
+   is currently added to the 'first' field;
+   should a future Unicode version make it necessary to expand the others,
+   the 'first' field could be reduced as needed, or larger ranges could
+   be split up (reduce limit max=255 e.g. to max=127 or max=63 in 
+   script mkcaseconv, check increasing table size).
+ */
 enum {TO1, TOLO, TOUP, TOBOTH};
+enum {EVENCAP, ODDCAP};
 static struct caseconv_entry {
   unsigned int first: 21;
   unsigned short diff: 8;
@@ -45,32 +73,33 @@ toulower (wint_t c)
 {
   const struct caseconv_entry * cce =
     bisearch(c, caseconv_table,
-             sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
+	     sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
 
   if (cce)
     switch (cce->mode)
       {
-      case TOLO:
-	return c + cce->delta;
-      case TOBOTH:
-	return c + 1;
-      case TO1:
-	switch (cce->delta)
-	  {
-	    case EVENCAP:
-	      if (!(c & 1))
-		return c + 1;
-	      break;
-	    case ODDCAP:
-	      if (c & 1)
-		return c + 1;
-	      break;
-	    default:
-	      break;
-	  }
+	case TOLO:
+	  return c + cce->delta;
+	case TOBOTH:
+	  return c + 1;
+	case TO1:
+	  switch (cce->delta)
+	    {
+	      case EVENCAP:
+		if (!(c & 1))
+		  return c + 1;
+		break;
+	      case ODDCAP:
+		if (c & 1)
+		  return c + 1;
+		break;
+	      default:
+		break;
+	    }
 	default:
 	  break;
       }
+
   return c;
 }
 
@@ -79,32 +108,33 @@ touupper (wint_t c)
 {
   const struct caseconv_entry * cce =
     bisearch(c, caseconv_table,
-             sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
+	     sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
 
   if (cce)
     switch (cce->mode)
       {
-      case TOUP:
-	return c + cce->delta;
-      case TOBOTH:
-	return c - 1;
-      case TO1:
-	switch (cce->delta)
-	  {
-	  case EVENCAP:
-	    if (c & 1)
-	      return c - 1;
-	    break;
-	  case ODDCAP:
-	    if (!(c & 1))
-	      return c - 1;
-	    break;
-	  default:
-	    break;
-	  }
+	case TOUP:
+	  return c + cce->delta;
+	case TOBOTH:
+	  return c - 1;
+	case TO1:
+	  switch (cce->delta)
+	    {
+	      case EVENCAP:
+		if (c & 1)
+		  return c - 1;
+		break;
+	      case ODDCAP:
+		if (!(c & 1))
+		  return c - 1;
+		break;
+	      default:
+		break;
+	    }
 	default:
 	  break;
       }
+
   return c;
 }
 
-- 
2.16.2



More information about the Newlib mailing list