width data generation

author Thomas Wolff <towo@towo.net>

Wed, 7 Mar 2018 22:55:52 +0000 (23:55 +0100)

committer Corinna Vinschen <corinna@vinschen.de>

Mon, 12 Mar 2018 09:17:20 +0000 (10:17 +0100)
author Thomas Wolff <towo@towo.net>
Wed, 7 Mar 2018 22:55:52 +0000 (23:55 +0100)
committer Corinna Vinschen <corinna@vinschen.de>
Mon, 12 Mar 2018 09:17:20 +0000 (10:17 +0100)
diff --git a/newlib/libc/string/WIDTH-A b/newlib/libc/string/WIDTH-A

new file mode 100644 (file)

index 0000000..51e8f23
--- /dev/null
+++ b/newlib/libc/string/WIDTH-A
@@ -0,0 +1,569 @@
+# UAX #11: East Asian Ambiguous
+
+# Plane 00
+# Rows Positions (Cells)
+
+  00   A1 A4 A7-A8 AA AD-AE B0-B4 B6-BA BC-BF C6 D0 D7-D8 DE-E1 E6 E8-EA
+  00   EC-ED F0 F2-F3 F7-FA FC FE
+  01   01 11 13 1B 26-27 2B 31-33 38 3F-42 44 48-4B 4D 52-53 66-67 6B
+  01   CE D0 D2 D4 D6 D8 DA DC
+  02   51 61 C4 C7 C9-CB CD D0 D8-DB DD DF
+  03   00-6F 91-A1 A3-A9 B1-C1 C3-C9
+  04   01 10-4F 51
+  20   10 13-16 18-19 1C-1D 20-22 24-27 30 32-33 35 3B 3E 74 7F 81-84
+  20   AC
+  21   03 05 09 13 16 21-22 26 2B 53-54 5B-5E 60-6B 70-79 89 90-99 B8-B9
+  21   D2 D4 E7
+  22   00 02-03 07-08 0B 0F 11 15 1A 1D-20 23 25 27-2C 2E 34-37 3C-3D
+  22   48 4C 52 60-61 64-67 6A-6B 6E-6F 82-83 86-87 95 99 A5 BF
+  23   12
+  24   60-E9 EB-FF
+  25   00-4B 50-73 80-8F 92-95 A0-A1 A3-A9 B2-B3 B6-B7 BC-BD C0-C1 C6-C8
+  25   CB CE-D1 E2-E5 EF
+  26   05-06 09 0E-0F 1C 1E 40 42 60-61 63-65 67-6A 6C-6D 6F 9E-9F BF
+  26   C6-CD CF-D3 D5-E1 E3 E8-E9 EB-F1 F4 F6-F9 FB-FC FE-FF
+  27   3D 76-7F
+  2B   56-59
+  32   48-4F
+  E0   00-FF
+  E1   00-FF
+  E2   00-FF
+  E3   00-FF
+  E4   00-FF
+  E5   00-FF
+  E6   00-FF
+  E7   00-FF
+  E8   00-FF
+  E9   00-FF
+  EA   00-FF
+  EB   00-FF
+  EC   00-FF
+  ED   00-FF
+  EE   00-FF
+  EF   00-FF
+  F0   00-FF
+  F1   00-FF
+  F2   00-FF
+  F3   00-FF
+  F4   00-FF
+  F5   00-FF
+  F6   00-FF
+  F7   00-FF
+  F8   00-FF
+  FE   00-0F
+  FF   FD
+  1F1  00-0A 10-2D 30-69 70-8D 8F-90 9B-AC
+  E01  00-EF
+  F00  00-FF
+  F01  00-FF
+  F02  00-FF
+  F03  00-FF
+  F04  00-FF
+  F05  00-FF
+  F06  00-FF
+  F07  00-FF
+  F08  00-FF
+  F09  00-FF
+  F0A  00-FF
+  F0B  00-FF
+  F0C  00-FF
+  F0D  00-FF
+  F0E  00-FF
+  F0F  00-FF
+  F10  00-FF
+  F11  00-FF
+  F12  00-FF
+  F13  00-FF
+  F14  00-FF
+  F15  00-FF
+  F16  00-FF
+  F17  00-FF
+  F18  00-FF
+  F19  00-FF
+  F1A  00-FF
+  F1B  00-FF
+  F1C  00-FF
+  F1D  00-FF
+  F1E  00-FF
+  F1F  00-FF
+  F20  00-FF
+  F21  00-FF
+  F22  00-FF
+  F23  00-FF
+  F24  00-FF
+  F25  00-FF
+  F26  00-FF
+  F27  00-FF
+  F28  00-FF
+  F29  00-FF
+  F2A  00-FF
+  F2B  00-FF
+  F2C  00-FF
+  F2D  00-FF
+  F2E  00-FF
+  F2F  00-FF
+  F30  00-FF
+  F31  00-FF
+  F32  00-FF
+  F33  00-FF
+  F34  00-FF
+  F35  00-FF
+  F36  00-FF
+  F37  00-FF
+  F38  00-FF
+  F39  00-FF
+  F3A  00-FF
+  F3B  00-FF
+  F3C  00-FF
+  F3D  00-FF
+  F3E  00-FF
+  F3F  00-FF
+  F40  00-FF
+  F41  00-FF
+  F42  00-FF
+  F43  00-FF
+  F44  00-FF
+  F45  00-FF
+  F46  00-FF
+  F47  00-FF
+  F48  00-FF
+  F49  00-FF
+  F4A  00-FF
+  F4B  00-FF
+  F4C  00-FF
+  F4D  00-FF
+  F4E  00-FF
+  F4F  00-FF
+  F50  00-FF
+  F51  00-FF
+  F52  00-FF
+  F53  00-FF
+  F54  00-FF
+  F55  00-FF
+  F56  00-FF
+  F57  00-FF
+  F58  00-FF
+  F59  00-FF
+  F5A  00-FF
+  F5B  00-FF
+  F5C  00-FF
+  F5D  00-FF
+  F5E  00-FF
+  F5F  00-FF
+  F60  00-FF
+  F61  00-FF
+  F62  00-FF
+  F63  00-FF
+  F64  00-FF
+  F65  00-FF
+  F66  00-FF
+  F67  00-FF
+  F68  00-FF
+  F69  00-FF
+  F6A  00-FF
+  F6B  00-FF
+  F6C  00-FF
+  F6D  00-FF
+  F6E  00-FF
+  F6F  00-FF
+  F70  00-FF
+  F71  00-FF
+  F72  00-FF
+  F73  00-FF
+  F74  00-FF
+  F75  00-FF
+  F76  00-FF
+  F77  00-FF
+  F78  00-FF
+  F79  00-FF
+  F7A  00-FF
+  F7B  00-FF
+  F7C  00-FF
+  F7D  00-FF
+  F7E  00-FF
+  F7F  00-FF
+  F80  00-FF
+  F81  00-FF
+  F82  00-FF
+  F83  00-FF
+  F84  00-FF
+  F85  00-FF
+  F86  00-FF
+  F87  00-FF
+  F88  00-FF
+  F89  00-FF
+  F8A  00-FF
+  F8B  00-FF
+  F8C  00-FF
+  F8D  00-FF
+  F8E  00-FF
+  F8F  00-FF
+  F90  00-FF
+  F91  00-FF
+  F92  00-FF
+  F93  00-FF
+  F94  00-FF
+  F95  00-FF
+  F96  00-FF
+  F97  00-FF
+  F98  00-FF
+  F99  00-FF
+  F9A  00-FF
+  F9B  00-FF
+  F9C  00-FF
+  F9D  00-FF
+  F9E  00-FF
+  F9F  00-FF
+  FA0  00-FF
+  FA1  00-FF
+  FA2  00-FF
+  FA3  00-FF
+  FA4  00-FF
+  FA5  00-FF
+  FA6  00-FF
+  FA7  00-FF
+  FA8  00-FF
+  FA9  00-FF
+  FAA  00-FF
+  FAB  00-FF
+  FAC  00-FF
+  FAD  00-FF
+  FAE  00-FF
+  FAF  00-FF
+  FB0  00-FF
+  FB1  00-FF
+  FB2  00-FF
+  FB3  00-FF
+  FB4  00-FF
+  FB5  00-FF
+  FB6  00-FF
+  FB7  00-FF
+  FB8  00-FF
+  FB9  00-FF
+  FBA  00-FF
+  FBB  00-FF
+  FBC  00-FF
+  FBD  00-FF
+  FBE  00-FF
+  FBF  00-FF
+  FC0  00-FF
+  FC1  00-FF
+  FC2  00-FF
+  FC3  00-FF
+  FC4  00-FF
+  FC5  00-FF
+  FC6  00-FF
+  FC7  00-FF
+  FC8  00-FF
+  FC9  00-FF
+  FCA  00-FF
+  FCB  00-FF
+  FCC  00-FF
+  FCD  00-FF
+  FCE  00-FF
+  FCF  00-FF
+  FD0  00-FF
+  FD1  00-FF
+  FD2  00-FF
+  FD3  00-FF
+  FD4  00-FF
+  FD5  00-FF
+  FD6  00-FF
+  FD7  00-FF
+  FD8  00-FF
+  FD9  00-FF
+  FDA  00-FF
+  FDB  00-FF
+  FDC  00-FF
+  FDD  00-FF
+  FDE  00-FF
+  FDF  00-FF
+  FE0  00-FF
+  FE1  00-FF
+  FE2  00-FF
+  FE3  00-FF
+  FE4  00-FF
+  FE5  00-FF
+  FE6  00-FF
+  FE7  00-FF
+  FE8  00-FF
+  FE9  00-FF
+  FEA  00-FF
+  FEB  00-FF
+  FEC  00-FF
+  FED  00-FF
+  FEE  00-FF
+  FEF  00-FF
+  FF0  00-FF
+  FF1  00-FF
+  FF2  00-FF
+  FF3  00-FF
+  FF4  00-FF
+  FF5  00-FF
+  FF6  00-FF
+  FF7  00-FF
+  FF8  00-FF
+  FF9  00-FF
+  FFA  00-FF
+  FFB  00-FF
+  FFC  00-FF
+  FFD  00-FF
+  FFE  00-FF
+  FFF  00-FD
+  1000 00-FF
+  1001 00-FF
+  1002 00-FF
+  1003 00-FF
+  1004 00-FF
+  1005 00-FF
+  1006 00-FF
+  1007 00-FF
+  1008 00-FF
+  1009 00-FF
+  100A 00-FF
+  100B 00-FF
+  100C 00-FF
+  100D 00-FF
+  100E 00-FF
+  100F 00-FF
+  1010 00-FF
+  1011 00-FF
+  1012 00-FF
+  1013 00-FF
+  1014 00-FF
+  1015 00-FF
+  1016 00-FF
+  1017 00-FF
+  1018 00-FF
+  1019 00-FF
+  101A 00-FF
+  101B 00-FF
+  101C 00-FF
+  101D 00-FF
+  101E 00-FF
+  101F 00-FF
+  1020 00-FF
+  1021 00-FF
+  1022 00-FF
+  1023 00-FF
+  1024 00-FF
+  1025 00-FF
+  1026 00-FF
+  1027 00-FF
+  1028 00-FF
+  1029 00-FF
+  102A 00-FF
+  102B 00-FF
+  102C 00-FF
+  102D 00-FF
+  102E 00-FF
+  102F 00-FF
+  1030 00-FF
+  1031 00-FF
+  1032 00-FF
+  1033 00-FF
+  1034 00-FF
+  1035 00-FF
+  1036 00-FF
+  1037 00-FF
+  1038 00-FF
+  1039 00-FF
+  103A 00-FF
+  103B 00-FF
+  103C 00-FF
+  103D 00-FF
+  103E 00-FF
+  103F 00-FF
+  1040 00-FF
+  1041 00-FF
+  1042 00-FF
+  1043 00-FF
+  1044 00-FF
+  1045 00-FF
+  1046 00-FF
+  1047 00-FF
+  1048 00-FF
+  1049 00-FF
+  104A 00-FF
+  104B 00-FF
+  104C 00-FF
+  104D 00-FF
+  104E 00-FF
+  104F 00-FF
+  1050 00-FF
+  1051 00-FF
+  1052 00-FF
+  1053 00-FF
+  1054 00-FF
+  1055 00-FF
+  1056 00-FF
+  1057 00-FF
+  1058 00-FF
+  1059 00-FF
+  105A 00-FF
+  105B 00-FF
+  105C 00-FF
+  105D 00-FF
+  105E 00-FF
+  105F 00-FF
+  1060 00-FF
+  1061 00-FF
+  1062 00-FF
+  1063 00-FF
+  1064 00-FF
+  1065 00-FF
+  1066 00-FF
+  1067 00-FF
+  1068 00-FF
+  1069 00-FF
+  106A 00-FF
+  106B 00-FF
+  106C 00-FF
+  106D 00-FF
+  106E 00-FF
+  106F 00-FF
+  1070 00-FF
+  1071 00-FF
+  1072 00-FF
+  1073 00-FF
+  1074 00-FF
+  1075 00-FF
+  1076 00-FF
+  1077 00-FF
+  1078 00-FF
+  1079 00-FF
+  107A 00-FF
+  107B 00-FF
+  107C 00-FF
+  107D 00-FF
+  107E 00-FF
+  107F 00-FF
+  1080 00-FF
+  1081 00-FF
+  1082 00-FF
+  1083 00-FF
+  1084 00-FF
+  1085 00-FF
+  1086 00-FF
+  1087 00-FF
+  1088 00-FF
+  1089 00-FF
+  108A 00-FF
+  108B 00-FF
+  108C 00-FF
+  108D 00-FF
+  108E 00-FF
+  108F 00-FF
+  1090 00-FF
+  1091 00-FF
+  1092 00-FF
+  1093 00-FF
+  1094 00-FF
+  1095 00-FF
+  1096 00-FF
+  1097 00-FF
+  1098 00-FF
+  1099 00-FF
+  109A 00-FF
+  109B 00-FF
+  109C 00-FF
+  109D 00-FF
+  109E 00-FF
+  109F 00-FF
+  10A0 00-FF
+  10A1 00-FF
+  10A2 00-FF
+  10A3 00-FF
+  10A4 00-FF
+  10A5 00-FF
+  10A6 00-FF
+  10A7 00-FF
+  10A8 00-FF
+  10A9 00-FF
+  10AA 00-FF
+  10AB 00-FF
+  10AC 00-FF
+  10AD 00-FF
+  10AE 00-FF
+  10AF 00-FF
+  10B0 00-FF
+  10B1 00-FF
+  10B2 00-FF
+  10B3 00-FF
+  10B4 00-FF
+  10B5 00-FF
+  10B6 00-FF
+  10B7 00-FF
+  10B8 00-FF
+  10B9 00-FF
+  10BA 00-FF
+  10BB 00-FF
+  10BC 00-FF
+  10BD 00-FF
+  10BE 00-FF
+  10BF 00-FF
+  10C0 00-FF
+  10C1 00-FF
+  10C2 00-FF
+  10C3 00-FF
+  10C4 00-FF
+  10C5 00-FF
+  10C6 00-FF
+  10C7 00-FF
+  10C8 00-FF
+  10C9 00-FF
+  10CA 00-FF
+  10CB 00-FF
+  10CC 00-FF
+  10CD 00-FF
+  10CE 00-FF
+  10CF 00-FF
+  10D0 00-FF
+  10D1 00-FF
+  10D2 00-FF
+  10D3 00-FF
+  10D4 00-FF
+  10D5 00-FF
+  10D6 00-FF
+  10D7 00-FF
+  10D8 00-FF
+  10D9 00-FF
+  10DA 00-FF
+  10DB 00-FF
+  10DC 00-FF
+  10DD 00-FF
+  10DE 00-FF
+  10DF 00-FF
+  10E0 00-FF
+  10E1 00-FF
+  10E2 00-FF
+  10E3 00-FF
+  10E4 00-FF
+  10E5 00-FF
+  10E6 00-FF
+  10E7 00-FF
+  10E8 00-FF
+  10E9 00-FF
+  10EA 00-FF
+  10EB 00-FF
+  10EC 00-FF
+  10ED 00-FF
+  10EE 00-FF
+  10EF 00-FF
+  10F0 00-FF
+  10F1 00-FF
+  10F2 00-FF
+  10F3 00-FF
+  10F4 00-FF
+  10F5 00-FF
+  10F6 00-FF
+  10F7 00-FF
+  10F8 00-FF
+  10F9 00-FF
+  10FA 00-FF
+  10FB 00-FF
+  10FC 00-FF
+  10FD 00-FF
+  10FE 00-FF
+  10FF 00-FD
+
diff --git a/newlib/libc/string/mkunidata b/newlib/libc/string/mkunidata

new file mode 100755 (executable)

index 0000000..c0bf5de
--- /dev/null
+++ b/newlib/libc/string/mkunidata
@@ -0,0 +1,54 @@
+#! /bin/sh
+
+echo generating Unicode width data for newlib/libc/string/wcwidth.c
+
+cd `dirname $0`
+PATH="$PATH":. # ensure access to uniset tool
+
+#############################################################################
+# checks and (with option -u) downloads
+
+case "$1" in
+-u)
+       #WGET=wget -N -t 1 --timeout=55
+       WGET=curl -R -O --connect-timeout 55
+       WGET+=-z $@
+
+       echo downloading uniset tool
+       $WGET http://www.cl.cam.ac.uk/~mgk25/download/uniset.tar.gz
+       gzip -dc uniset.tar.gz | tar xvf - uniset
+
+       echo downloading data from unicode.org
+       for data in UnicodeData.txt Blocks.txt EastAsianWidth.txt
+       do      $WGET http://unicode.org/Public/UNIDATA/$data
+       done
+       ;;
+*)     echo checking package unicode-ucd
+       grep unicode-ucd /etc/setup/installed.db || exit 9
+       ;;
+esac
+
+echo checking uniset tool
+type uniset || exit 9
+
+for data in UnicodeData.txt Blocks.txt EastAsianWidth.txt
+do     test -r $data || ln -s /usr/share/unicode/ucd/$data . || exit 9
+done
+
+echo generating from Unicode version `sed -e 's,[^.0-9],,g' -e 1q Blocks.txt`
+exit
+
+#############################################################################
+# table generation
+
+echo generating combining characters table
+uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B +D7B0-D7C6 +D7CB-D7FB c > combining.t
+
+echo generating ambiguous width characters table
+sh ./mkwidthA && uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c > ambiguous.t
+
+echo generating wide characters table
+sh ./mkwide
+
+#############################################################################
+# end
diff --git a/newlib/libc/string/mkwide b/newlib/libc/string/mkwide

new file mode 100755 (executable)

index 0000000..55a0bab
--- /dev/null
+++ b/newlib/libc/string/mkwide
@@ -0,0 +1,49 @@
+#! /bin/sh
+
+# generate list of wide characters, with convex closure
+
+skipcheck=false
+
+if [ ! -r EastAsianWidth.txt ]
+then   ln -s /usr/share/unicode/ucd/EastAsianWidth.txt . || exit 1
+fi
+if [ ! -r UnicodeData.txt ]
+then   ln -s /usr/share/unicode/ucd/UnicodeData.txt . || exit 1
+fi
+if [ ! -r Blocks.txt ]
+then   ln -s /usr/share/unicode/ucd/Blocks.txt . || exit 1
+fi
+
+sed -e "s,^\([^;]*\);[NAH],\1," -e t -e d EastAsianWidth.txt > wide.na
+sed -e "s,^\([^;]*\);[WF],\1," -e t -e d EastAsianWidth.txt > wide.fw
+
+PATH="$PATH:." # for uniset
+
+nrfw=`uniset +wide.fw nr | sed -e 's,.*:,,'`
+echo FW $nrfw
+nrna=`uniset +wide.na nr | sed -e 's,.*:,,'`
+echo NAH $nrna
+
+extrablocks="2E80-303E"
+
+# check all blocks
+includes () {
+       nr=`uniset +wide.$2 -$1 nr | sed -e 's,.*:,,'`
+       test $nr != $3
+}
+echo "adding compact closure of wide ranges, this may take ~10min"
+for b in $extrablocks `sed -e 's,^\([0-9A-F]*\)\.\.\([0-9A-F]*\).*,\1-\2,' -e t -e d Blocks.txt`
+do     range=$b
+       echo checking $range $* >&2
+       if includes $range fw $nrfw && ! includes $range na $nrna
+       then    echo $range
+       fi
+done > wide.blocks
+
+(
+sed -e "s,^,//," -e 1q EastAsianWidth.txt
+sed -e "s,^,//," -e 1q Blocks.txt
+uniset `sed -e 's,^,+,' wide.blocks` +wide.fw c
+) > wide.t
+
+rm -f wide.na wide.fw wide.blocks
diff --git a/newlib/libc/string/mkwidthA b/newlib/libc/string/mkwidthA

new file mode 100755 (executable)

index 0000000..343ab40
--- /dev/null
+++ b/newlib/libc/string/mkwidthA
@@ -0,0 +1,20 @@
+#! /bin/sh
+
+# generate WIDTH-A file, listing Unicode characters with width property
+# Ambiguous, from EastAsianWidth.txt
+
+if [ ! -r EastAsianWidth.txt ]
+then   ln -s /usr/share/unicode/ucd/EastAsianWidth.txt . || exit 1
+fi
+if [ ! -r UnicodeData.txt ]
+then   ln -s /usr/share/unicode/ucd/UnicodeData.txt . || exit 1
+fi
+if [ ! -r Blocks.txt ]
+then   ln -s /usr/share/unicode/ucd/Blocks.txt . || exit 1
+fi
+
+sed -e "s,^\([^;]*\);A,\1," -e t -e d EastAsianWidth.txt > width-a-new
+rm -f WIDTH-A
+echo "# UAX #11: East Asian Ambiguous" > WIDTH-A
+PATH="$PATH:." uniset +width-a-new compact >> WIDTH-A
+rm -f width-a-new
diff --git a/newlib/libc/string/uniset b/newlib/libc/string/uniset

new file mode 100755 (executable)

index 0000000..85d3b2a
--- /dev/null
+++ b/newlib/libc/string/uniset
@@ -0,0 +1,696 @@
+#!/usr/bin/perl
+# Uniset -- Unicode subset manager -- Markus Kuhn
+# http://www.cl.cam.ac.uk/~mgk25/download/uniset.tar.gz
+
+require 5.008;
+use open ':utf8';
+use FindBin qw($RealBin);  # to find directory where this file is located
+
+binmode(STDOUT, ":utf8");
+binmode(STDIN, ":utf8");
+
+my (%name, %invname, %category, %comment);
+
+print <<End if $#ARGV < 0;
+Uniset -- Unicode subset manager -- Markus Kuhn
+
+Uniset merges and subtracts Unicode subsets. It can output and
+analyse the resulting character set in various formats.
+
+Uniset understand the following command-line arguments:
+
+Commands to define a set of characters:
+
+  + filename   add the character set described in the file to the set
+  - filename   remove the character set described in the file from the set
+  +: filename  add the characters in the UTF-8 file to the set
+  -: filename  remove the characters in the UTF-8 file from the set
+  +xxxx..yyyy  add the range to the set (xxxx and yyyy are hex numbers)
+  -xxxx..yyyy  remove the range from the set (xxxx and yyyy are hex numbers)
+  +cat=Xx      add all Unicode characters with category code Xx
+  -cat=Xx      remove all Unicode characters with category code Xx
+  -cat!=Xx     remove all Unicode characters without category code Xx
+  clean        remove any elements that do not appear in the Unicode database
+  unknown      remove any elements that do appear in the Unicode database
+
+Command to output descriptions of the constructed set of characters:
+
+  table        write a full table with one line per character
+  compact      output the set in compact MES format
+  c            output the set as C interval array
+  nr           output the number of characters
+  sources      output a table that shows the number of characters contributed
+               by the various combinations of input sets added with +.
+  utf8-list    output a list of all characters encoded in UTF-8
+
+Commands to tailor the following output commands:
+
+  html         write HTML tables instead of plain text
+  ucs          add the unicode character itself to the table (UTF-8 in
+               plain table, numeric character reference in HTML)
+
+Formats of character set input files read by the + and - command:
+
+Empty lines, white space at the start and end of the line and any
+comment text following a \# are ignored. The following formats are
+recognized
+
+xx yyyy             xx is the hex code in an 8-bit character set and yyyy
+                    is the corresponding Unicode value. Both can optionally
+                    be prefixed by 0x. This is the format used in the
+                    files on <ftp://ftp.unicode.org/Public/MAPPINGS/>.
+
+yyyy                yyyy (optionally prefixed with 0x) is a Unicode character
+                    belonging to the specified subset.
+
+yyyy-yyyy           a range of Unicode characters belonging to
+yyyy..yyyy          the specified subset.
+
+xx yy yy yy-yy yy   xx denotes a row (high-byte) and the yy specify
+                    corresponding low bytes or with a hyphen also ranges of
+                    low bytes in the Unicode values that belong to this
+                    subset. This is also the format that is generated by
+                    the compact command.
+End
+exit 1 if $#ARGV < 0;
+
+
+# Subroutine to identify whether the ISO 10646/Unicode character code
+# ucs belongs into the East Asian Wide (W) or East Asian FullWidth
+# (F) category as defined in Unicode Technical Report #11.
+
+sub iswide ($) {
+    my $ucs = shift(@_);
+
+    return ($ucs >= 0x1100 &&
+           ($ucs <= 0x115f ||                     # Hangul Jamo
+            $ucs == 0x2329 || $ucs == 0x232a ||
+            ($ucs >= 0x2e80 && $ucs <= 0xa4cf &&
+             $ucs != 0x303f) ||                   # CJK .. Yi
+            ($ucs >= 0xac00 && $ucs <= 0xd7a3) || # Hangul Syllables
+            ($ucs >= 0xf900 && $ucs <= 0xfaff) || # CJK Comp. Ideographs
+            ($ucs >= 0xfe30 && $ucs <= 0xfe6f) || # CJK Comp. Forms
+            ($ucs >= 0xff00 && $ucs <= 0xff60) || # Fullwidth Forms
+            ($ucs >= 0xffe0 && $ucs <= 0xffe6) ||
+            ($ucs >= 0x20000 && $ucs <= 0x2fffd) ||
+            ($ucs >= 0x30000 && $ucs <= 0x3fffd)));
+}
+
+# Return the Unicode name that belongs to a given character code
+
+# Jamo short names, see Unicode 3.0, table 4-4, page 86
+
+my @lname = ('G', 'GG', 'N', 'D', 'DD', 'R', 'M', 'B', 'BB', 'S', 'SS', '',
+            'J', 'JJ', 'C', 'K', 'T', 'P', 'H'); # 1100..1112
+my @vname = ('A', 'AE', 'YA', 'YAE', 'EO', 'E', 'YEO', 'YE', 'O',
+            'WA', 'WAE', 'OE', 'YO', 'U', 'WEO', 'WE', 'WI', 'YU',
+            'EU', 'YI', 'I'); # 1161..1175
+my @tname = ('G', 'GG', 'GS', 'N', 'NJ', 'NH', 'D', 'L', 'LG', 'LM',
+            'LB', 'LS', 'LT', 'LP', 'LH', 'M', 'B', 'BS', 'S', 'SS',
+            'NG', 'J', 'C', 'K', 'T', 'P', 'H'); # 11a8..11c2
+
+sub name {
+    my $ucs = shift(@_);
+
+    # The intervals used here reflect Unicode Version 3.2
+    if (($ucs >=  0x3400 && $ucs <=  0x4db5) ||
+       ($ucs >=  0x4e00 && $ucs <=  0x9fa5) ||
+       ($ucs >= 0x20000 && $ucs <= 0x2a6d6)) {
+       return "CJK UNIFIED IDEOGRAPH-" . sprintf("%04X", $ucs);
+    }
+
+    if ($ucs >= 0xac00 && $ucs <= 0xd7a3) {
+       my $s = $ucs - 0xac00;
+       my $l = 0x1100 + int($s / (21 * 28));
+       my $v = 0x1161 + int(($s % (21 * 28)) / 28);
+       my $t = 0x11a7 + $s % 28;
+       return "HANGUL SYLLABLE " .
+           ($lname[int($s / (21 * 28))] .
+            $vname[int(($s % (21 * 28)) / 28)] .
+            $tname[$s % 28 - 1]);
+    }
+
+    return $name{$ucs};
+}
+
+sub is_unicode {
+    my $ucs = shift(@_);
+
+    # The intervals used here reflect Unicode Version 3.2
+    if (($ucs >=  0x3400 && $ucs <=  0x4db5) ||
+       ($ucs >=  0x4e00 && $ucs <=  0x9fa5) ||
+       ($ucs >=  0xac00 && $ucs <=  0xd7a3) ||
+       ($ucs >= 0x20000 && $ucs <= 0x2a6d6)) {
+       return 1;
+    }
+
+    return exists $name{$ucs};
+}
+
+my @search_path;
+push @search_path, "$ENV{HOME}/local/share/uniset"
+    if -d "$ENV{HOME}/local/share/uniset";
+push @search_path, "/usr/share/uniset" if -d "/usr/share/uniset";
+push @search_path, $RealBin unless $RealBin =~ m|^/usr/bin|;
+
+sub search_open {
+    my ($mode, $fn) = @_;
+    my $file;
+    return $file if open($file, $mode, $fn);
+    return undef if $fn =~ m|/|;
+    for my $path (@search_path) {
+       return $file if open($file, $mode, "$path/$fn");
+    }
+    return undef;
+}
+
+my $html = 0;
+my $image = 0;
+my $adducs = 0;
+my $unicodedata = "UnicodeData.txt";
+my $blockdata = "Blocks.txt";
+
+# read list of all Unicode names
+my $data = search_open('<', $unicodedata);
+unless ($data) {
+    die ("Can't open Unicode database '$unicodedata':\n$!\n\n" .
+        "Please make sure that you have downloaded the file\n" .
+        "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n");
+}
+while (<$data>) {
+    if (/^([0-9,A-F]{4,8});([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*)$/) {
+       next if $2 ne '<control>' && substr($2, 0, 1) eq '<';
+       $ucs = hex($1);
+        $name{$ucs} = $2;
+       $invname{$2} = $ucs;
+       $category{$ucs} = $3;
+        $comment{$ucs} = $12;
+    } else {
+        die("Syntax error in line '$_' in file '$unicodedata'");
+    }
+}
+close($data);
+
+# read list of all Unicode blocks
+$data = search_open('<', $blockdata);
+unless ($data) {
+    die ("Can't open Unicode blockname list '$blockdata':\n$!\n\n" .
+        "Please make sure that you have downloaded the file\n" .
+        "http://www.unicode.org/Public/UNIDATA/Blocks.txt\n");
+}
+my $blocks = 0;
+my (@blockstart, @blockend, @blockname);
+while (<$data>) {
+    if (/^\s*([0-9,A-F]{4,8})\s*\.\.\s*([0-9,A-F]{4,8})\s*;\s*(.*)$/) {
+        $blockstart[$blocks] = hex($1);
+       $blockend  [$blocks] = hex($2);
+        $blockname [$blocks] = $3;
+       $blocks++;
+    } elsif (/^\s*\#/ || /^\s*$/) {
+       # ignore comments and empty lines
+    } else {
+        die("Syntax error in line '$_' in file '$blockdata'");
+    }
+}
+close($data);
+if ($blockend[$blocks-1] < 0x110000) {
+    $blockstart[$blocks] = 0x110000;
+    $blockend  [$blocks] = 0x7FFFFFFF;
+    $blockname [$blocks] = "Beyond Plane 16";
+    $blocks++;
+}
+
+# process command line arguments
+while ($_ = shift(@ARGV)) {
+    if (/^html$/) {
+       $html = 1;
+    } elsif (/^ucs$/) {
+       $adducs = 1;
+    } elsif (/^img$/) {
+       $html = 1;
+       $image = 1;
+    } elsif (/^template$/) {
+       $template = shift(@ARGV);
+       open(TEMPLATE, $template) || die("Can't open template file '$template': '$!'");
+       while (<TEMPLATE>) {
+           if (/^\#\s*include\s+\"([^\"]*)\"\s*$/) {
+               open(INCLUDE, $1) || die("Can't open template include file '$1': '$!'");
+               while (<INCLUDE>) {
+                   print $_;
+               }
+               close(INCLUDE);
+           } elsif (/^\#\s*quote\s+\"([^\"]*)\"\s*$/) {
+               open(INCLUDE, $1) || die("Can't open template include file '$1': '$!'");
+               while (<INCLUDE>) {
+                   s/&/&amp;/g;
+                   s/</&lt;/g;
+                   print $_;
+               }
+               close(INCLUDE);
+           } else {
+               print $_;
+           }
+       }
+       close(TEMPLATE);
+    } elsif (/^\+cat=(.+)$/) {
+       # add characters with given category
+       $cat = $1;
+       for $i (keys(%category)) {
+           $used{$i} = "[${cat}]" if $category{$i} eq $cat;
+       }
+    } elsif (/^\-cat=(.+)$/) {
+       # remove characters with given category
+       $cat = $1;
+       for $i (keys(%category)) {
+           delete $used{$i} if $category{$i} eq $cat;
+       }
+    } elsif (/^\-cat!=(.+)$/) {
+       # remove characters without given category
+       $cat = $1;
+       for $i (keys(%category)) {
+           delete $used{$i} unless $category{$i} eq $cat;
+       }
+    } elsif (/^([+-]):(.*)/) {
+       $remove = $1 eq "-";
+       $setfile = $2;
+       $setfile = shift(@ARGV) if $setfile eq "";
+       push(@SETS, $setfile);
+       open(SET, $setfile) || die("Can't open set file '$setfile': '$!'");
+       $setname = $setfile;
+       while (<SET>) {
+           while ($_) {
+               $i = ord($_);
+               $used{$i} .= "[${setname}]" unless $remove;
+               delete $used{$i} if $remove;
+               $_ = substr($_, 1);
+           }
+       }
+       close SET;
+    } elsif (/^([+-])(.*)/) {
+       $remove = $1 eq "-";
+       $setfile = $2;
+       $setfile = "$setfile..$setfile" if $setfile =~ /^([0-9A-Fa-f]{4,8})$/;
+       if ($setfile =~ /^([0-9A-Fa-f]{4,8})(-|\.\.)([0-9A-Fa-f]{4,8})$/) {
+           # handle intervall specification on command line
+           $first = hex($1);
+           $last = hex($3);
+           for ($i = $first; $i <= $last; $i++) {
+               $used{$i} .= "[ARG]" unless $remove;
+               delete $used{$i} if $remove;
+           }
+           next;
+       }
+       $setfile = shift(@ARGV) if $setfile eq "";
+       push(@SETS, $setfile);
+       my $setf = search_open('<', $setfile);
+       die("Can't open set file '$setfile': '$!'") unless $setf;
+       $cedf = ($setfile =~ /cedf/); # detect Kosta Kosti's trans CEDF format by path name
+       $setname = $setfile;
+       $setname =~ s/([^.\[\]]*)\..*/$1/;
+       while (<$setf>) {
+           if (/^<code_set_name>/) {
+               # handle ISO 15897 (POSIX registry) charset mapping format
+               undef $comment_char;
+               undef $escape_char;
+               while (<$setf>) {
+                   if ($comment_char && /^$comment_char/) {
+                       # remove comments
+                       $_ = $`;
+                   }
+                   next if (/^\032?\s*$/);                                             # skip empty lines
+                   if (/^<comment_char> (\S)$/) {
+                       $comment_char = $1;
+                   } elsif (/^<escape_char> (\S)$/) {
+                       $escape_char = $1;
+                   } elsif (/^(END )?CHARMAP$/) {
+                       #ignore
+                   } elsif (/^<.*>\s*\/x([0-9A-F]{2})\s*<U([0-9A-F]{4,8})>/) {
+                       $used{hex($2)} .= "[${setname}{$1}]" unless $remove;
+                       delete $used{hex($2)} if $remove;
+                   } else {
+                       die("Syntax error in line $. in file '$setfile':\n'$_'\n");
+                   }
+               }
+               next;
+           } elsif (/^STARTFONT /) {
+               # handle X11 BDF file
+               while (<$setf>) {
+                   if (/^ENCODING\s+([0-9]+)/) {
+                       $used{$1} .= "[${setname}]" unless $remove;
+                       delete $used{$1} if $remove;
+                   }
+               }
+               next;
+           }
+           tr/a-z/A-Z/;           # make input uppercase
+           if ($cedf) {
+               if ($. > 4) {
+                   if (/^([0-9A-F]{2})\t.?\t(.*)$/) {
+                       # handle Kosta Kosti's trans CEDF format
+                       next if (hex($1) < 32 || (hex($1) > 0x7e && hex($1) < 0xa0));
+                       $ucs = $invname{$2};
+                       die "unknown ISO 10646 name '$2' in '$setfile' line $..\n" if ! $ucs;
+                       $used{$ucs} .= "[${setname}{$1}]" unless $remove;
+                       delete $used{$ucs} if $remove;
+                   } else {
+                       die("Syntax error in line $. in CEDF file '$setfile':\n'$_'\n");
+                   }
+               }
+               next;
+           }
+           if (/^\s*(0X|U\+|U-)?([0-9A-F]{2})\s+\#\s*UNDEFINED\s*$/) {
+               # ignore ftp.unicode.org mapping file lines with #UNDEFINED
+               next;
+           }
+           s/^([^\#]*)\#.*$/$1/;  # remove comments
+           next if (/^\032?\s*$/);     # skip empty lines
+           if (/^\s*(0X)?([0-9A-F-]{2})\s+(0X|U\+|U-)?([0-9A-F]{4,8})\s*$/) {
+               # handle entry from a ftp.unicode.org mapping file
+               $used{hex($4)} .= "[${setname}{$2}]" unless $remove;
+               delete $used{hex($4)} if $remove;
+           } elsif (/^\s*(0X|U\+|U-)?([0-9A-F]{4,8})(\s*-\s*|\s*\.\.\s*|\s+)(0X|U\+|U-)?([0-9A-F]{4,8})\s*$/) {
+               # handle interval specification
+               $first = hex($2);
+               $last = hex($5);
+               for ($i = $first; $i <= $last; $i++) {
+                   $used{$i} .= "[${setname}]" unless $remove;
+                   delete $used{$i} if $remove;
+               }
+           } elsif (/^\s*([0-9A-F]{2,6})(\s+[0-9A-F]{2},?|\s+[0-9A-F]{2}-[0-9A-F]{2},?)+/) {
+               # handle lines from P10 MES draft
+               $row = $1;
+               $cols = $_;
+               $cols =~ s/^\s*([0-9A-F]{2,6})\s*(.*)\s*$/$2/;
+               $cols =~ tr/,//d;
+               @cols = split(/\s+/, $cols);
+               for (@cols) {
+                   if (/^(..)$/) {
+                       $first = hex("$row$1");
+                       $last  = $first;
+                   } elsif (/^(..)-(..)$/) {
+                       $first = hex("$row$1");
+                       $last  = hex("$row$2");
+                   } else {
+                       die ("this should never happen '$_'");
+                   }
+                   for ($i = $first; $i <= $last; $i++) {
+                       $used{$i} .= "[${setname}]" unless $remove;
+                       delete $used{$i} if $remove;
+                   }
+               }
+           } elsif (/^\s*(0X|U\+|U-)?([0-9A-F]{4,8})\s*/) {
+               # handle single character
+               $used{hex($2)} .= "[${setname}]" unless $remove;
+               delete $used{hex($2)} if $remove;
+           } else {
+               die("Syntax error in line $. in file '$setfile':\n'$_'\n") unless /^\s*(\#.*)?$/;
+           }
+       }
+       close $setf;
+    } elsif (/^loadimages$/ || /^loadbigimages$/) {
+       if (/^loadimages$/) {
+           $prefix = "Small.Glyphs";
+       } else {
+           $prefix = "Glyphs";
+       }
+       $total = 0;
+       for $i (keys(%used)) {
+           next if ($name{$i} eq "<control>");
+           $total++;
+       }
+       $count = 0;
+       $| = 1;
+       for $i (sort({$a <=> $b} keys(%used))) {
+           next if ($name{$i} eq "<control>");
+           $count++;
+           $j = sprintf("%04X", $i);
+           $j =~ /(..)(..)/;
+           $gif = "http://charts.unicode.org/Unicode.charts/$prefix/$1/U$j.gif";
+           print("\r$count/$total: $gif");
+           system("mkdir -p $prefix/$1; cd $prefix/$1; webcopy -u -s $gif &");
+           select(undef, undef, undef, 0.2);
+       }
+       print("\n");
+       exit 0;
+    } elsif (/^giftable/) {
+       # form a table of glyphs (requires pbmtools installed)
+       $count = 0;
+       for $i (keys(%used)) {
+           $count++ unless $name{$i} eq "<control>";
+       }
+       $width = int(sqrt($count/sqrt(2)) + 0.5);
+       $width = $1 if /^giftable([0-9]+)$/;
+       system("rm -f tmp-*.pnm table.pnm~ table.pnm");
+       $col = 0;
+       $row = 0;
+       for $i (sort({$a <=> $b} keys(%used))) {
+           next if ($name{$i} eq "<control>");
+           $j = sprintf("%04X", $i);
+           $j =~ /(..)(..)/;
+           $gif = "Small.Glyphs/$1/U$j.gif";
+           $pnm = sprintf("tmp-%02x.pnm", $col);
+           $fallback = "Small.Glyphs/FF/UFFFD.gif";
+           system("giftopnm $gif >$pnm || { rm $pnm ; giftopnm $fallback >$pnm ; }");
+           if (++$col == $width) {
+               system("pnmcat -lr tmp-*.pnm | cat >tmp-row.pnm");
+               if ($row == 0) {
+                   system("mv tmp-row.pnm table.pnm");
+               } else {
+                   system("mv table.pnm table.pnm~; pnmcat -tb table.pnm~ tmp-row.pnm >table.pnm");
+               }
+               $row++;
+               $col = 0;
+               system("rm -f tmp-*.pnm table.pnm~");
+           }
+       }
+       if ($col > 0) {
+           system("pnmcat -lr tmp-*.pnm | cat >tmp-row.pnm");
+           if ($row == 0) {
+               system("mv tmp-row.pnm table.pnm");
+           } else {
+               system("mv table.pnm table.pnm~; pnmcat -tb -jleft -black table.pnm~ tmp-row.pnm >table.pnm");
+           }
+       }
+       system("rm -f table.gif ; ppmtogif table.pnm > table.gif");
+       system("rm -f tmp-*.pnm table.pnm~ table.pnm");
+    } elsif (/^table$/) {
+       # go through all used names to print full table
+       print "<TABLE border=2>\n" if $html;
+       for $i (sort({$a <=> $b} keys(%used))) {
+           next if ($name{$i} eq "<control>");
+           if ($html) {
+               $sources = $used{$i};
+               $sources =~ s/\]\[/, /g;
+               $sources =~ s/^\[//g;
+               $sources =~ s/\]$//g;
+               $sources =~ s/\{(..)\}/<SUB>$1<\/SUB>/g;
+               $j = sprintf("%04X", $i);
+               $j =~ /(..)(..)/;
+               $gif = "Small.Glyphs/$1/U$j.gif";
+               print "<TR>";
+               print "<TD><img width=32 height=32 src=\"$gif\">" if $image;
+               printf("<TD>&#%d;", $i) if $adducs;
+               print "<TD><SAMP>$j</SAMP><TD><SAMP>" . name($i);
+               print " ($comment{$i})" if $comment{$i};
+               print "</SAMP><TD><SMALL>$sources</SMALL>\n";
+           } else {
+               printf("%04X \# ", $i);
+               print pack("U", $i) . " " if $adducs;
+               print name($i) ."\n";
+           }
+       }
+       print "</TABLE>\n" if $html;
+    } elsif (/^imgblock$/) {
+       $width = 16;
+       $width = $1 if /giftable([0-9]+)/;
+       $col = 0;
+       $subline = "";
+       print "\n<P><TABLE cellspacing=0 cellpadding=0>";
+       for $i (sort({$a <=> $b} keys(%used))) {
+           print "<TR>" if $col == 0;
+           $j = sprintf("%04X", $i);
+           $j =~ /(..)(..)/;
+           $gif = "Small.Glyphs/$1/U$j.gif";
+           $alt = name($i);
+           print "<TD><img width=32 height=32 src=\"$gif\" alt=\"$alt\">";
+           $subline .= "<TD><SMALL><SAMP>$j</SAMP></SMALL>";
+           if (++$col == $width) {
+               print "<TR align=center>$subline";
+               $col = 0;
+               $subline = "";
+           }
+       }
+       print "<TR align=center>$subline" if ($col > 0);
+       print "</TABLE>\n";
+    } elsif (/^sources$/) {
+       # count how many characters are attributed to the various source set combinations
+       print "<P>Number of occurences of source character set combinations:\n<TABLE border=2>" if $html;
+       for $i (keys(%used)) {
+           next if ($name{$i} eq "<control>");
+           $sources = $used{$i};
+           $sources =~ s/\]\[/, /g;
+           $sources =~ s/^\[//g;
+           $sources =~ s/\]$//g;
+           $sources =~ s/\{(..)\}//g;
+           $contribs{$sources} += 1;
+       }
+       for $j (keys(%contribs)) {
+           print "<TR><TD>$contribs{$j}<TD>$j\n" if $html;
+       }
+       print "</TABLE>\n" if $html;
+    } elsif (/^compact$/) {
+       # print compact table in P10 MES format
+       print "<P>Compact representation of this character set:\n<TABLE border=2>" if $html;
+       print "<TR><TD><B>Rows</B><TD><B>Positions (Cells)</B>" if $html;
+       print "\n# Plane 00\n# Rows\tPositions (Cells)\n" unless $html;
+       $current_row = '';
+       $start_col = '';
+       $last_col = '';
+       for $i (sort({$a <=> $b} keys(%used))) {
+           next if ($name{$i} eq "<control>");
+           $row = sprintf("%02X", $i >> 8);
+           $col = sprintf("%02X", $i & 0xff);
+           if ($row ne $current_row) {
+               if (($last_col ne '') and ($last_col ne $start_col)) {
+                   print "-$last_col";
+                   print "</SAMP>" if $html;
+               }
+               print "<TR><TD><SAMP>$row</SAMP><TD><SAMP>" if $html;
+               print "\n  $row\t" unless $html;
+               $len = 0;
+               $current_row = $row;
+               $start_col = '';
+           }
+           if ($start_col eq '') {
+               print "$col";
+               $len += 2;
+               $start_col = $col;
+               $last_col = $col;
+           } elsif (hex($col) == hex($last_col) + 1) {
+               $last_col = $col;
+           } else {
+               if ($last_col ne $start_col) {
+                   print "-$last_col";
+                   $len += 3;
+               }
+               if ($len > 60 && !$html) {
+                   print "\n  $row\t";
+                   $len = 0;
+               };
+               print " " if $len;
+               print "$col";
+               $len += 2 + !! $len;
+               $start_col = $col;
+               $last_col = $col;
+           }
+       }
+       if (($last_col ne '') and ($last_col ne $start_col)) {
+           print "-$last_col";
+           print "</SAMP>" if $html;
+       }
+       print "\n" if ($current_row ne '');
+       print "</TABLE>\n" if $html;
+       print "\n";
+    } elsif (/^c$/) {
+       # print table as C interval array
+       print "{";
+       $last_i = '';
+       $columns = 3;
+       $col = $columns;
+       for $i (sort({$a <=> $b} keys(%used))) {
+           next if ($name{$i} eq "<control>");
+           if ($last_i eq '') {
+               if (++$col > $columns) { $col = 1; print "\n "; }
+               printf(" { 0x%04X, ", $i);
+               $last_i = $i;
+           } elsif ($i == $last_i + 1) {
+               $last_i = $i;
+           } else {
+               printf("0x%04X },", $last_i);
+               if (++$col > $columns) { $col = 1; print "\n "; }
+               printf(" { 0x%04X, ", $i);
+               $last_i = $i;
+           }
+       }
+       if ($last_i ne '') {
+           printf("0x%04X }", $last_i);
+       }
+       print "\n};\n";
+    } elsif (/^utf8-list$/) {
+       $col = 0;
+       $block = 0;
+       $last = -1;
+       for $i (sort({$a <=> $b} keys(%used))) {
+           next if ($name{$i} eq "<control>");
+           while ($blockend[$block] < $i && $block < $blocks - 1) {
+               $block++;
+           }
+           if ($last <= $blockend[$block-1] &&
+               $i < $blockstart[$block]) {
+               print "\n" if ($col);
+               printf "\nFree block (U+%04X-U+%04X):\n\n",
+                   $blockend[$block-1] + 1, $blockstart[$block] - 1;
+               $col = 0;
+           }
+           if ($last < $blockstart[$block] && $i >= $blockstart[$block]) {
+               print "\n" if ($col);
+               printf "\n$blockname[$block] (U+%04X-U+%04X):\n\n",
+               $blockstart[$block], $blockend[$block];
+               $col = 0;
+           }
+           if ($category{$i} eq 'Mn') {
+               # prefix non-spacing character with U+25CC DOTTED CIRCLE
+               print "\x{25CC}";
+           } elsif ($category{$i} eq 'Me') {
+               # prefix enclosing non-spacing character with space
+               print " ";
+           }
+           print pack("U", $i);
+           $col += 1 + iswide($i);
+           if ($col >= 64) {
+               print "\n";
+               $col = 0;
+           }
+           $last = $i;
+       }
+       print "\n" if ($col);
+    } elsif (/^collections$/) {
+       $block = 0;
+       $last = -1;
+       for $i (sort({$a <=> $b} keys(%used))) {
+           next if ($name{$i} eq "<control>");
+           while ($blockend[$block] < $i && $block < $blocks - 1) {
+               $block++;
+           }
+           if ($last < $blockstart[$block] && $i >= $blockstart[$block]) {
+               print $blockname[$block],
+                 " " x (40 - length($blockname[$block]));
+               printf "%04X-%04X\n",
+                 $blockstart[$block], $blockend[$block];
+           }
+           $last = $i;
+       }
+    } elsif (/^nr$/) {
+       print "<P>" if $html;
+       print "# " unless $html;
+       print "Number of characters in above table: ";
+       $count = 0;
+       for $i (keys(%used)) {
+           $count++ unless $name{$i} eq "<control>";
+       }
+       print $count;
+       print "\n";
+    } elsif (/^clean$/) {
+       # remove characters from set that are not in $unicodedata
+       for $i (keys(%used)) {
+           delete $used{$i} unless is_unicode($i);
+       }
+    } elsif (/^unknown$/) {
+       # remove characters from set that are in $unicodedata
+       for $i (keys(%used)) {
+           delete $used{$i} if is_unicode($i);
+       }
+    } else {
+       die("Unknown command line command '$_'");
+    };
+}
author	Thomas Wolff <towo@towo.net>
	Wed, 7 Mar 2018 22:55:52 +0000 (23:55 +0100)
committer	Corinna Vinschen <corinna@vinschen.de>
	Mon, 12 Mar 2018 09:17:20 +0000 (10:17 +0100)
newlib/libc/string/WIDTH-A	[new file with mode: 0644]	patch \| blob
newlib/libc/string/mkunidata	[new file with mode: 0755]	patch \| blob
newlib/libc/string/mkwide	[new file with mode: 0755]	patch \| blob
newlib/libc/string/mkwidthA	[new file with mode: 0755]	patch \| blob
newlib/libc/string/uniset	[new file with mode: 0755]	patch \| blob