From f8f4784437d319ad3ac2e3c629335fd0f50bee69 Mon Sep 17 00:00:00 2001 From: Thomas Wolff Date: Fri, 23 Mar 2018 20:07:22 +0100 Subject: [PATCH] comments to document struct caseconv_entry explain design of compact (packed) struct caseconv_entry, in case it needs to be modified for future Unicode versions --- newlib/libc/ctype/towctrans_l.c | 108 +++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 1 deletions(-) diff --git a/newlib/libc/ctype/towctrans_l.c b/newlib/libc/ctype/towctrans_l.c index eaabd8c..0f210e0 100644 --- a/newlib/libc/ctype/towctrans_l.c +++ b/newlib/libc/ctype/towctrans_l.c @@ -4,8 +4,36 @@ //#include #include "local.h" -enum {EVENCAP, ODDCAP}; +/* + struct caseconv_entry describes the case conversion behaviour + of a range of Unicode characters. + It was designed to be compact for a minimal table size. + The range is first...first + diff. + Conversion behaviour for a character c in the respective range: + mode == TOLO towlower (c) = c + delta + mode == TOUP towupper (c) = c + delta + mode == TOBOTH (titling case characters) + towlower (c) = c + 1 + towupper (c) = c - 1 + mode == TO1 capital/small letters are alternating + delta == EVENCAP even codes are capital + delta == ODDCAP odd codes are capital + (this correlates with an even/odd first range value + as of Unicode 10.0 but we do not rely on this) + As of Unicode 10.0, the following field lengths are sufficient + first: 17 bits + diff: 8 bits + delta: 17 bits + mode: 2 bits + The reserve of 4 bits (to limit the struct to 6 bytes) + is currently added to the 'first' field; + should a future Unicode version make it necessary to expand the others, + the 'first' field could be reduced as needed, or larger ranges could + be split up (reduce limit max=255 e.g. to max=127 or max=63 in + script mkcaseconv, check increasing table size). + */ enum {TO1, TOLO, TOUP, TOBOTH}; +enum {EVENCAP, ODDCAP}; static struct caseconv_entry { unsigned int first: 21; unsigned short diff: 8; @@ -45,32 +73,33 @@ toulower (wint_t c) -- 2.16.2