[PATCH 4/6] generated character data for libc/ctype
Thomas Wolff
towo@towo.net
Fri Mar 23 19:28:00 GMT 2018
Am 13.03.2018 um 22:41 schrieb Thomas Wolff:
> Am 13.03.2018 um 22:10 schrieb Corinna Vinschen:
>> On Mar 8 00:21, Thomas Wolff wrote:
>>> From 58a9cfcb253165d7073a9ed25e143daa2e979c10 Mon Sep 17 00:00:00 2001
>>> From: Thomas Wolff <towo@towo.net>
>>> Date: Sun, 25 Feb 2018 17:22:34 +0100
>>> Subject: [PATCH 4/6] use generated character data
>>>
>>> ---
>>> Â newlib/libc/ctype/towctrans_l.c |Â 97 +++++++-
>>> ...
>> ...
>>
>> I pushed a patch.
>>
>>
>> Corinna
>>
> Thanks a lot for hot-fixing this. I'll meditate how this could slip
> through...
> And I'll also check why this wasn't discovered during my extensive
> testing.
Thanks again for helping to get this accomplished.
While meditating, I noticed that the bit packing of the case conversion
entries could use some documentation.
The attached patch adds that (and some tweaking for consistent
indentation); no code changes.
Thomas
-------------- next part --------------
From f8f4784437d319ad3ac2e3c629335fd0f50bee69 Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Fri, 23 Mar 2018 20:07:22 +0100
Subject: [PATCH] comments to document struct caseconv_entry
explain design of compact (packed) struct caseconv_entry,
in case it needs to be modified for future Unicode versions;
indentation tweaks
---
newlib/libc/ctype/towctrans_l.c | 108 +++++++++++++++++++++++++---------------
1 file changed, 69 insertions(+), 39 deletions(-)
diff --git a/newlib/libc/ctype/towctrans_l.c b/newlib/libc/ctype/towctrans_l.c
index eaabd8c..0f210e0 100644
--- a/newlib/libc/ctype/towctrans_l.c
+++ b/newlib/libc/ctype/towctrans_l.c
@@ -4,8 +4,36 @@
//#include <errno.h>
#include "local.h"
-enum {EVENCAP, ODDCAP};
+/*
+ struct caseconv_entry describes the case conversion behaviour
+ of a range of Unicode characters.
+ It was designed to be compact for a minimal table size.
+ The range is first...first + diff.
+ Conversion behaviour for a character c in the respective range:
+ mode == TOLO towlower (c) = c + delta
+ mode == TOUP towupper (c) = c + delta
+ mode == TOBOTH (titling case characters)
+ towlower (c) = c + 1
+ towupper (c) = c - 1
+ mode == TO1 capital/small letters are alternating
+ delta == EVENCAP even codes are capital
+ delta == ODDCAP odd codes are capital
+ (this correlates with an even/odd first range value
+ as of Unicode 10.0 but we do not rely on this)
+ As of Unicode 10.0, the following field lengths are sufficient
+ first: 17 bits
+ diff: 8 bits
+ delta: 17 bits
+ mode: 2 bits
+ The reserve of 4 bits (to limit the struct to 6 bytes)
+ is currently added to the 'first' field;
+ should a future Unicode version make it necessary to expand the others,
+ the 'first' field could be reduced as needed, or larger ranges could
+ be split up (reduce limit max=255 e.g. to max=127 or max=63 in
+ script mkcaseconv, check increasing table size).
+ */
enum {TO1, TOLO, TOUP, TOBOTH};
+enum {EVENCAP, ODDCAP};
static struct caseconv_entry {
unsigned int first: 21;
unsigned short diff: 8;
@@ -45,32 +73,33 @@ toulower (wint_t c)
{
const struct caseconv_entry * cce =
bisearch(c, caseconv_table,
- sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
+ sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
if (cce)
switch (cce->mode)
{
- case TOLO:
- return c + cce->delta;
- case TOBOTH:
- return c + 1;
- case TO1:
- switch (cce->delta)
- {
- case EVENCAP:
- if (!(c & 1))
- return c + 1;
- break;
- case ODDCAP:
- if (c & 1)
- return c + 1;
- break;
- default:
- break;
- }
+ case TOLO:
+ return c + cce->delta;
+ case TOBOTH:
+ return c + 1;
+ case TO1:
+ switch (cce->delta)
+ {
+ case EVENCAP:
+ if (!(c & 1))
+ return c + 1;
+ break;
+ case ODDCAP:
+ if (c & 1)
+ return c + 1;
+ break;
+ default:
+ break;
+ }
default:
break;
}
+
return c;
}
@@ -79,32 +108,33 @@ touupper (wint_t c)
{
const struct caseconv_entry * cce =
bisearch(c, caseconv_table,
- sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
+ sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
if (cce)
switch (cce->mode)
{
- case TOUP:
- return c + cce->delta;
- case TOBOTH:
- return c - 1;
- case TO1:
- switch (cce->delta)
- {
- case EVENCAP:
- if (c & 1)
- return c - 1;
- break;
- case ODDCAP:
- if (!(c & 1))
- return c - 1;
- break;
- default:
- break;
- }
+ case TOUP:
+ return c + cce->delta;
+ case TOBOTH:
+ return c - 1;
+ case TO1:
+ switch (cce->delta)
+ {
+ case EVENCAP:
+ if (c & 1)
+ return c - 1;
+ break;
+ case ODDCAP:
+ if (!(c & 1))
+ return c - 1;
+ break;
+ default:
+ break;
+ }
default:
break;
}
+
return c;
}
--
2.16.2
More information about the Newlib
mailing list