diff --git a/locale/coll-lookup.c b/locale/coll-lookup.c index dd2a1d7..ef215f0 100644 --- a/locale/coll-lookup.c +++ b/locale/coll-lookup.c @@ -78,3 +78,5 @@ __collseq_table_lookup (const char *table, uint32_t wc) } return ~((uint32_t) 0); } + +#include "weight.c" diff --git a/locale/weight.h b/locale/weight.h index 9eb8ac6..d491a8c 100644 --- a/locale/weight.h +++ b/locale/weight.h @@ -16,117 +16,13 @@ License along with the GNU C Library; if not, see . */ -/* Find index of weight. */ -auto inline int32_t -__attribute ((always_inline)) -findidx (const unsigned char **cpp, size_t len) -{ - int_fast32_t i = table[*(*cpp)++]; - const unsigned char *cp; - const unsigned char *usrc; - - if (i >= 0) - /* This is an index into the weight table. Cool. */ - return i; - - /* Oh well, more than one sequence starting with this byte. - Search for the correct one. */ - cp = &extra[-i]; - usrc = *cpp; - --len; - while (1) - { - size_t nhere; - - /* The first thing is the index. */ - i = *((const int32_t *) cp); - cp += sizeof (int32_t); - - /* Next is the length of the byte sequence. These are always - short byte sequences so there is no reason to call any - function (even if they are inlined). */ - nhere = *cp++; - - if (i >= 0) - { - /* It is a single character. If it matches we found our - index. Note that at the end of each list there is an - entry of length zero which represents the single byte - sequence. The first (and here only) byte was tested - already. */ - size_t cnt; - - for (cnt = 0; cnt < nhere && cnt < len; ++cnt) - if (cp[cnt] != usrc[cnt]) - break; - - if (cnt == nhere) - { - /* Found it. */ - *cpp += nhere; - return i; - } - - /* Up to the next entry. */ - cp += nhere; - if (!LOCFILE_ALIGNED_P (1 + nhere)) - cp += LOCFILE_ALIGN - (1 + nhere) % LOCFILE_ALIGN; - } - else - { - /* This is a range of characters. First decide whether the - current byte sequence lies in the range. */ - size_t cnt; - size_t offset = 0; - - for (cnt = 0; cnt < nhere && cnt < len; ++cnt) - if (cp[cnt] != usrc[cnt]) - break; - - if (cnt != nhere) - { - if (cnt == len || cp[cnt] > usrc[cnt]) - { - /* Cannot be in this range. */ - cp += 2 * nhere; - if (!LOCFILE_ALIGNED_P (1 + 2 * nhere)) - cp += (LOCFILE_ALIGN - - (1 + 2 * nhere) % LOCFILE_ALIGN); - continue; - } - - /* Test against the end of the range. */ - for (cnt = 0; cnt < nhere; ++cnt) - if (cp[nhere + cnt] != usrc[cnt]) - break; - - if (cnt != nhere && cp[nhere + cnt] < usrc[cnt]) - { - /* Cannot be in this range. */ - cp += 2 * nhere; - if (!LOCFILE_ALIGNED_P (1 + 2 * nhere)) - cp += (LOCFILE_ALIGN - - (1 + 2 * nhere) % LOCFILE_ALIGN); - continue; - } - - /* This range matches the next characters. Now find - the offset in the indirect table. */ - for (cnt = 0; cp[cnt] == usrc[cnt]; ++cnt); - - do - { - offset <<= 8; - offset += usrc[cnt] - cp[cnt]; - } - while (++cnt < nhere); - } - - *cpp += nhere; - return indirect[-i + offset]; - } - } - - /* NOTREACHED */ - return 0x43219876; -} +#include +extern int32_t +findidx (const int32_t *table, const int32_t *indirect, + const unsigned char *extra, + const unsigned char **cpp, size_t len); + +extern int32_t +findidxwc (const int32_t *table, const int32_t *indirect, + const wint_t *extra, + const wint_t **cpp, size_t len); diff --git a/locale/weightwc.h b/locale/weightwc.h index 8f047e3..5702cf9 100644 --- a/locale/weightwc.h +++ b/locale/weightwc.h @@ -16,96 +16,4 @@ License along with the GNU C Library; if not, see . */ -/* Find index of weight. */ -auto inline int32_t -__attribute ((always_inline)) -findidx (const wint_t **cpp, size_t len) -{ - wint_t ch = *(*cpp)++; - int32_t i = __collidx_table_lookup ((const char *) table, ch); - if (i >= 0) - /* This is an index into the weight table. Cool. */ - return i; - - /* Oh well, more than one sequence starting with this byte. - Search for the correct one. */ - const int32_t *cp = (const int32_t *) &extra[-i]; - --len; - while (1) - { - size_t nhere; - const int32_t *usrc = (const int32_t *) *cpp; - - /* The first thing is the index. */ - i = *cp++; - - /* Next is the length of the byte sequence. These are always - short byte sequences so there is no reason to call any - function (even if they are inlined). */ - nhere = *cp++; - - if (i >= 0) - { - /* It is a single character. If it matches we found our - index. Note that at the end of each list there is an - entry of length zero which represents the single byte - sequence. The first (and here only) byte was tested - already. */ - size_t cnt; - - for (cnt = 0; cnt < nhere && cnt < len; ++cnt) - if (cp[cnt] != usrc[cnt]) - break; - - if (cnt == nhere) - { - /* Found it. */ - *cpp += nhere; - return i; - } - - /* Up to the next entry. */ - cp += nhere; - } - else - { - /* This is a range of characters. First decide whether the - current byte sequence lies in the range. */ - size_t cnt; - size_t offset; - - for (cnt = 0; cnt < nhere - 1 && cnt < len; ++cnt) - if (cp[cnt] != usrc[cnt]) - break; - - if (cnt < nhere - 1) - { - cp += 2 * nhere; - continue; - } - - if (cp[nhere - 1] > usrc[nhere -1]) - { - cp += 2 * nhere; - continue; - } - - if (cp[2 * nhere - 1] < usrc[nhere -1]) - { - cp += 2 * nhere; - continue; - } - - /* This range matches the next characters. Now find - the offset in the indirect table. */ - offset = usrc[nhere - 1] - cp[nhere - 1]; - *cpp += nhere; - - return indirect[-i + offset]; - } - } - - /* NOTREACHED */ - return 0x43219876; -} diff --git a/posix/fnmatch.c b/posix/fnmatch.c index 4f14a8b..133ded0 100644 --- a/posix/fnmatch.c +++ b/posix/fnmatch.c @@ -225,6 +225,7 @@ __wcschrnul (s, c) # define MEMPCPY(D, S, N) __mempcpy (D, S, N) # define MEMCHR(S, C, N) memchr (S, C, N) # define STRCOLL(S1, S2) strcoll (S1, S2) +# define FINDIDX findidx # include "fnmatch_loop.c" @@ -318,6 +319,7 @@ is_char_class (const wchar_t *wcs) } # define IS_CHAR_CLASS(string) is_char_class (string) +# define FINDIDX findidxwc # include "fnmatch_loop.c" # endif diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c index f79d051..16d4e6d 100644 --- a/posix/fnmatch_loop.c +++ b/posix/fnmatch_loop.c @@ -385,13 +385,6 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used) int32_t idx; const UCHAR *cp = (const UCHAR *) str; - /* This #include defines a local function! */ -# if WIDE_CHAR_VERSION -# include -# else -# include -# endif - # if WIDE_CHAR_VERSION table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); @@ -412,7 +405,7 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); # endif - idx = findidx (&cp, 1); + idx = FINDIDX (table, indirect, extra, &cp, 1); if (idx != 0) { /* We found a table entry. Now see whether the @@ -422,7 +415,8 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used) int32_t idx2; const UCHAR *np = (const UCHAR *) n; - idx2 = findidx (&np, string_end - n); + idx2 = FINDIDX (table, indirect, extra, + &np, string_end - n); if (idx2 != 0 && (idx >> 24) == (idx2 >> 24) && len == weights[idx2 & 0xffffff]) diff --git a/posix/regcomp.c b/posix/regcomp.c index 921d0f4..9d48185 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -3389,6 +3389,9 @@ parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, return REG_NOERROR; } +/* This #include defines a static inline function! */ +# include + /* Helper function for parse_bracket_exp. Build the equivalence class which is represented by NAME. The result are written to MBCSET and SBCSET. @@ -3413,8 +3416,6 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) int32_t idx1, idx2; unsigned int ch; size_t len; - /* This #include defines a local function! */ -# include /* Calculate the index for equivalence class. */ cp = name; table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); @@ -3424,7 +3425,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - idx1 = findidx (&cp, -1); + idx1 = findidx (table, indirect, extra, &cp, -1); if (BE (idx1 == 0 || *cp != '\0', 0)) /* This isn't a valid character. */ return REG_ECOLLATE; @@ -3435,7 +3436,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) { char_buf[0] = ch; cp = char_buf; - idx2 = findidx (&cp, 1); + idx2 = findidx (table, indirect, extra, &cp, 1); /* idx2 = table[ch]; */ diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 75c390f..c680c74 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -732,6 +732,8 @@ re_string_wchar_at (const re_string_t *pstr, int idx) return (wint_t) pstr->wcs[idx]; } +#include + # ifndef NOT_IN_libc static int internal_function __attribute__ ((pure, unused)) @@ -740,7 +742,6 @@ re_string_elem_size_at (const re_string_t *pstr, int idx) # ifdef _LIBC const unsigned char *p, *extra; const int32_t *table, *indirect; -# include uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); if (nrules != 0) @@ -751,7 +752,7 @@ re_string_elem_size_at (const re_string_t *pstr, int idx) indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); p = pstr->mbs + idx; - findidx (&p, pstr->len - idx); + findidx (table, indirect, extra, &p, pstr->len - idx); return p - pstr->mbs - idx; } else diff --git a/posix/regexec.c b/posix/regexec.c index 7032da7..72959ad 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -3925,7 +3925,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - int32_t idx = findidx (&cp, elem_len); + int32_t idx = findidx (table, indirect, extra, &cp, elem_len); if (idx > 0) for (i = 0; i < cset->nequiv_classes; ++i) { diff --git a/string/strcoll_l.c b/string/strcoll_l.c index 10ce4a6..fd8f428 100644 --- a/string/strcoll_l.c +++ b/string/strcoll_l.c @@ -29,10 +29,10 @@ #ifndef STRING_TYPE # define STRING_TYPE char # define USTRING_TYPE unsigned char +# define FINDIDX findidx # define STRCOLL __strcoll_l # define STRCMP strcmp # define STRLEN strlen -# define WEIGHT_H "../locale/weight.h" # define SUFFIX MB # define L(arg) arg #endif @@ -41,6 +41,7 @@ #define CONCAT1(a,b) a##b #include "../locale/localeinfo.h" +#include "../locale/weight.h" /* Track status while looking for sequences in a string. */ typedef struct @@ -152,7 +153,6 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets, const USTRING_TYPE *weights, const int32_t *table, const USTRING_TYPE *extra, const int32_t *indirect) { -#include WEIGHT_H size_t val = seq->val = 0; int len = seq->len; size_t backw_stop = seq->backw_stop; @@ -194,7 +194,7 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets, while (*us != L('\0')) { - int32_t tmp = findidx (&us, -1); + int32_t tmp = FINDIDX (table, indirect, extra, &us, -1); rulearr[idxmax] = tmp >> 24; idxarr[idxmax] = tmp & 0xffffff; idxcnt = idxmax++; @@ -242,7 +242,6 @@ get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets, const USTRING_TYPE *extra, const int32_t *indirect, int pass) { -#include WEIGHT_H size_t val = seq->val = 0; int len = seq->len; size_t backw_stop = seq->backw_stop; @@ -285,7 +284,7 @@ get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets, us = seq->back_us; while (i < backw) { - int32_t tmp = findidx (&us, -1); + int32_t tmp = FINDIDX (table, indirect, extra, &us, -1); idx = tmp & 0xffffff; i++; } @@ -300,7 +299,7 @@ get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets, while (*us != L('\0')) { - int32_t tmp = findidx (&us, -1); + int32_t tmp = FINDIDX (table, indirect, extra, &us, -1); unsigned char rule = tmp >> 24; prev_idx = idx; idx = tmp & 0xffffff; diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c index 04b9338..6ef90f5 100644 --- a/string/strxfrm_l.c +++ b/string/strxfrm_l.c @@ -28,11 +28,11 @@ #ifndef STRING_TYPE # define STRING_TYPE char # define USTRING_TYPE unsigned char +# define FINDIDX findidx # define STRXFRM __strxfrm_l # define STRCMP strcmp # define STRLEN strlen # define STPNCPY __stpncpy -# define WEIGHT_H "../locale/weight.h" # define SUFFIX MB # define L(arg) arg #endif @@ -41,7 +41,7 @@ #define CONCAT1(a,b) a##b #include "../locale/localeinfo.h" - +#include "../locale/weight.h" #ifndef WIDE_CHAR_VERSION @@ -104,8 +104,6 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) size_t idxcnt; int use_malloc; -#include WEIGHT_H - if (nrules == 0) { if (n != 0) @@ -174,7 +172,7 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) idxmax = 0; do { - int32_t tmp = findidx (&usrc, -1); + int32_t tmp = FINDIDX (table, indirect, extra, &usrc, -1); rulearr[idxmax] = tmp >> 24; idxarr[idxmax] = tmp & 0xffffff; diff --git a/wcsmbs/wcscoll_l.c b/wcsmbs/wcscoll_l.c index 74e2e39..10717a8 100644 --- a/wcsmbs/wcscoll_l.c +++ b/wcsmbs/wcscoll_l.c @@ -22,10 +22,10 @@ #define STRING_TYPE wchar_t #define USTRING_TYPE wint_t +#define FINDIDX findidxwc #define STRCOLL __wcscoll_l #define STRCMP wcscmp #define STRLEN __wcslen -#define WEIGHT_H "../locale/weightwc.h" #define SUFFIX WC #define L(arg) L##arg #define WIDE_CHAR_VERSION 1 diff --git a/wcsmbs/wcsxfrm_l.c b/wcsmbs/wcsxfrm_l.c index f3f3f50..780967d 100644 --- a/wcsmbs/wcsxfrm_l.c +++ b/wcsmbs/wcsxfrm_l.c @@ -21,11 +21,11 @@ #define STRING_TYPE wchar_t #define USTRING_TYPE wint_t +#define FINDIDX findidxwc #define STRXFRM __wcsxfrm_l #define STRCMP wcscmp #define STRLEN __wcslen #define STPNCPY __wcpncpy -#define WEIGHT_H "../locale/weightwc.h" #define SUFFIX WC #define L(arg) L##arg #define WIDE_CHAR_VERSION 1