a smaller LC_CTYPE locale format
Bruno Haible
haible@ilog.fr
Fri Jul 28 06:42:00 GMT 2000
One more place must be updated for the new locale format: fnmatch.
The comments say that fnmatch's speed in 8-bit locales depends on a
fast
iswctype (btowc (c), desc)
function without going through btowc and gconv. Therefore I add a small
(32 byte) table for this in front of each wctype_t table.
2000-07-27 Bruno Haible <haible@clisp.cons.org>
* locale/localeinfo.h (_ISCTYPE): New macro.
* posix/fnmatch_loop.c (internal_fnmatch): Use it, support new
LC_CTYPE locale format.
* locale/programs/ld-ctype.c (locale_ctype_t): New field class_b.
(ctype_output): Output class_b[nr] right before class_3level[nr].
(allocate_arrays): Fill class_b, similarly to class_3level.
*** glibc-20000724/locale/localeinfo.h.bak Mon Jul 3 16:39:31 2000
--- glibc-20000724/locale/localeinfo.h Fri Jul 28 01:00:21 2000
***************
*** 121,126 ****
--- 121,134 ----
};
+ /* LC_CTYPE specific:
+ Access a wide character class with a single character index.
+ _ISCTYPE (c, desc) = iswctype (btowc (c), desc).
+ c must be an `unsigned char'. desc must be a nonzero wctype_t. */
+ #define _ISCTYPE(c, desc) \
+ (((((const uint32_t *) (desc)) - 8)[(c) >> 5] >> ((c) & 0x1f)) & 1)
+
+
/* For each category declare the variable for the current locale data. */
#define DEFINE_CATEGORY(category, category_name, items, a) \
extern struct locale_data *_nl_current_##category;
*** glibc-20000724/posix/fnmatch_loop.c.bak Thu Jul 6 17:36:53 2000
--- glibc-20000724/posix/fnmatch_loop.c Fri Jul 28 00:38:53 2000
***************
*** 256,290 ****
/* Invalid character class name. */
return FNM_NOMATCH;
- /* The following code is glibc specific but does
- there a good job in sppeding up the code since
- we can avoid the btowc() call. The
- IS_CHAR_CLASS call will return a bit mask for
- the 32-bit table. We have to convert it to a
- bitmask for the __ctype_b table. This has to
- be done based on the byteorder as can be seen
- below. In any case we will fall back on the
- code using btowc() if the class is not one of
- the standard classes. */
# if defined _LIBC && ! WIDE_CHAR_VERSION
! # if __BYTE_ORDER == __LITTLE_ENDIAN
! if ((wt & 0xf0ffff) == 0)
{
! wt >>= 16;
! if ((__ctype_b[(UCHAR) *n] & wt) != 0)
! goto matched;
! }
# else
! if (wt <= 0x800)
{
! if ((__ctype_b[(UCHAR) *n] & wt) != 0)
goto matched;
}
! # endif
! else
# endif
- if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
- goto matched;
#else
if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
|| (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
--- 256,296 ----
/* Invalid character class name. */
return FNM_NOMATCH;
# if defined _LIBC && ! WIDE_CHAR_VERSION
! /* The following code is glibc specific but does
! there a good job in speeding up the code since
! we can avoid the btowc() call. */
! if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
{
! /* Old locale format. */
! # if __BYTE_ORDER == __LITTLE_ENDIAN
! if ((wt & 0xf0ffff) == 0)
! {
! wt >>= 16;
! if ((__ctype_b[(UCHAR) *n] & wt) != 0)
! goto matched;
! }
# else
! if (wt <= 0x800)
! {
! if ((__ctype_b[(UCHAR) *n] & wt) != 0)
! goto matched;
! }
! # endif
! else
! if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
! goto matched;
! }
! else
{
! /* New locale format. */
! if (_ISCTYPE ((UCHAR) *n, wt))
goto matched;
}
! # else
! if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
! goto matched;
# endif
#else
if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
|| (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
*** glibc-20000724/locale/programs/ld-ctype.c.bak Tue Jul 25 01:53:16 2000
--- glibc-20000724/locale/programs/ld-ctype.c Fri Jul 28 02:18:12 2000
***************
*** 170,175 ****
--- 170,176 ----
uint32_t *names;
uint32_t **map;
uint32_t **map32;
+ uint32_t **class_b;
struct iovec *class_3level;
struct iovec *map_3level;
uint32_t *class_name_ptr;
***************
*** 842,848 ****
+ (oldstyle_tables
? (ctype->map_collection_nr - 2)
: (ctype->nr_charclass + ctype->map_collection_nr)));
! struct iovec iov[2 + nelems + ctype->nr_charclass
+ ctype->map_collection_nr + 2];
struct locale_file data;
uint32_t idx[nelems + 1];
--- 843,849 ----
+ (oldstyle_tables
? (ctype->map_collection_nr - 2)
: (ctype->nr_charclass + ctype->map_collection_nr)));
! struct iovec iov[2 + nelems + 2 * ctype->nr_charclass
+ ctype->map_collection_nr + 2];
struct locale_file data;
uint32_t idx[nelems + 1];
***************
*** 1169,1174 ****
--- 1170,1181 ----
size_t nr = elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE);
if (nr < ctype->nr_charclass)
{
+ iov[2 + elem + offset].iov_base = ctype->class_b[nr];
+ iov[2 + elem + offset].iov_len = 256 / 32
+ * sizeof (uint32_t);
+ idx[elem] += iov[2 + elem + offset].iov_len;
+ ++offset;
+
iov[2 + elem + offset] = ctype->class_3level[nr];
}
else
***************
*** 1182,1188 ****
}
}
! assert (2 + elem + offset == (nelems + ctype->nr_charclass
+ ctype->map_collection_nr + 2 + 2));
write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
--- 1189,1195 ----
}
}
! assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
+ ctype->map_collection_nr + 2 + 2));
write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
***************
*** 4060,4067 ****
xcalloc ((oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256),
sizeof (char_class32_t));
if (!oldstyle_tables)
! ctype->class_3level = (struct iovec *)
! xmalloc (ctype->nr_charclass * sizeof (struct iovec));
/* This is the array accessed using the multibyte string elements. */
for (idx = 0; idx < 256; ++idx)
--- 4067,4078 ----
xcalloc ((oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256),
sizeof (char_class32_t));
if (!oldstyle_tables)
! {
! ctype->class_b = (uint32_t **)
! xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
! ctype->class_3level = (struct iovec *)
! xmalloc (ctype->nr_charclass * sizeof (struct iovec));
! }
/* This is the array accessed using the multibyte string elements. */
for (idx = 0; idx < 256; ++idx)
***************
*** 4089,4094 ****
--- 4100,4115 ----
if (!oldstyle_tables)
{
size_t nr;
+
+ for (nr = 0; nr < ctype->nr_charclass; nr++)
+ {
+ ctype->class_b[nr] = (uint32_t *)
+ xcalloc (256 / 32, sizeof (uint32_t));
+
+ for (idx = 0; idx < 256; ++idx)
+ if (ctype->class256_collection[idx] & _ISbit (nr))
+ ctype->class_b[nr][idx >> 5] |= (uint32_t)1 << (idx & 0x1f);
+ }
for (nr = 0; nr < ctype->nr_charclass; nr++)
{
More information about the Libc-alpha
mailing list