This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] remove nested functions from regcomp.c
- From: Konstantin Serebryany <konstantin dot s dot serebryany at gmail dot com>
- To: Roland McGrath <roland at hack dot frob dot com>, GNU C Library <libc-alpha at sourceware dot org>
- Date: Mon, 29 Sep 2014 13:54:31 -0700
- Subject: [PATCH] remove nested functions from regcomp.c
- Authentication-results: sourceware.org; auth=none
Hi,
Please review the patch that removes nested functions from posix/regcomp.c.
The patch does not noticeably affect the generated code (same
instructions, a few differences in used registers, offsets, etc)
because all the affected functions have __attribute ((always_inline)).
No regressions in 'make check' on x86_64-linux-gnu (Ubuntu 14.04)
This code has some #ifdef branches for "not _LIBC",
please let me know if any separate testing is required for that case.
This change makes GCC emit 4 new warning that all look like this:
==================
regcomp.c: In function âparse_expressionâ:
regcomp.c:2804:21: warning: âextraâ may be used uninitialized in this
function [-Wmaybe-uninitialized]
name_len == extra[idx]
^
regcomp.c:3055:24: note: âextraâ was declared here
const unsigned char *extra;
==================
The warnings look incorrect, since the code is doing this:
if (nrules)
extra = ....
...
if (nrules)
use(extra)
This feature of GCC is known to have various problems and false
positives, but the warnings
did not fire before because nested functions inhibit this warning.
The warnings can be avoided by initializing all these variables to 0
at declaration.
2014-09-29 Kostya Serebryany <konstantin.s.serebryany@gmail.com>
* posix/regcomp.c
(seek_collating_symbol_entry): New function, broken out of
parse_bracket_exp.
(lookup_collation_sequence_value): Ditto.
(build_range_exp): Ditto.
(build_collating_symbol): Ditto.
(parse_bracket_exp): Remove nested functions. Update call sites.
--kcc
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 897fe27..5d3f2b2 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -2630,7 +2630,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
#define BRACKET_NAME_BUF_SIZE 32
#ifndef _LIBC
- /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
+ /* Static function for parse_bracket_exp only used in case of NOT _LIBC.
Build the range expression which starts from START_ELEM, and ends
at END_ELEM. The result are written to MBCSET and SBCSET.
RANGE_ALLOC is the allocated size of mbcset->range_starts, and
@@ -2762,10 +2762,13 @@ static reg_errcode_t
internal_function
# ifdef RE_ENABLE_I18N
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
- int *coll_sym_alloc, const unsigned char *name)
+ int *coll_sym_alloc, const unsigned char *name,
# else /* not RE_ENABLE_I18N */
-build_collating_symbol (bitset_t sbcset, const unsigned char *name)
+build_collating_symbol (bitset_t sbcset, const unsigned char *name,
# endif /* not RE_ENABLE_I18N */
+ uint32_t nrules,
+ const int32_t *symb_table, int32_t table_size,
+ const unsigned char *extra)
{
size_t name_len = strlen ((const char *) name);
if (BE (name_len != 1, 0))
@@ -2778,257 +2781,279 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
}
#endif /* not _LIBC */
-/* This function parse bracket expression like "[abc]", "[a-c]",
- "[[.a-a.]]" etc. */
+/* Static function for parse_bracket_exp used in _LIBC environement.
+ Seek the collating symbol entry correspondings to NAME.
+ Return the index of the symbol in the SYMB_TABLE,
+ or -1 if not found. */
-static bin_tree_t *
-parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
- reg_syntax_t syntax, reg_errcode_t *err)
+static inline int32_t
+__attribute ((always_inline))
+seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
+ const int32_t *symb_table, int32_t table_size,
+ const unsigned char *extra)
{
-#ifdef _LIBC
- const unsigned char *collseqmb;
- const char *collseqwc;
- uint32_t nrules;
- int32_t table_size;
- const int32_t *symb_table;
- const unsigned char *extra;
-
- /* Local function for parse_bracket_exp used in _LIBC environement.
- Seek the collating symbol entry correspondings to NAME.
- Return the index of the symbol in the SYMB_TABLE,
- or -1 if not found. */
-
- auto inline int32_t
- __attribute ((always_inline))
- seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
- {
- int32_t elem;
+ int32_t elem;
- for (elem = 0; elem < table_size; elem++)
- if (symb_table[2 * elem] != 0)
- {
- int32_t idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- if (/* Compare the length of the name. */
- name_len == extra[idx]
- /* Compare the name. */
- && memcmp (name, &extra[idx + 1], name_len) == 0)
- /* Yep, this is the entry. */
- return elem;
- }
- return -1;
- }
+ for (elem = 0; elem < table_size; elem++)
+ if (symb_table[2 * elem] != 0)
+ {
+ int32_t idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ if (/* Compare the length of the name. */
+ name_len == extra[idx]
+ /* Compare the name. */
+ && memcmp (name, &extra[idx + 1], name_len) == 0)
+ /* Yep, this is the entry. */
+ return elem;
+ }
+ return -1;
+}
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Look up the collation sequence value of BR_ELEM.
- Return the value if succeeded, UINT_MAX otherwise. */
+/* Static function for parse_bracket_exp used in _LIBC environment.
+ Look up the collation sequence value of BR_ELEM.
+ Return the value if succeeded, UINT_MAX otherwise. */
- auto inline unsigned int
- __attribute ((always_inline))
- lookup_collation_sequence_value (bracket_elem_t *br_elem)
+static inline unsigned int
+__attribute ((always_inline))
+lookup_collation_sequence_value (bracket_elem_t *br_elem,
+ uint32_t nrules,
+ const unsigned char *collseqmb,
+ const char *collseqwc,
+ const int32_t *symb_table, int32_t table_size,
+ const unsigned char *extra
+ )
+{
+ if (br_elem->type == SB_CHAR)
{
- if (br_elem->type == SB_CHAR)
- {
- /*
- if (MB_CUR_MAX == 1)
- */
- if (nrules == 0)
- return collseqmb[br_elem->opr.ch];
- else
- {
- wint_t wc = __btowc (br_elem->opr.ch);
- return __collseq_table_lookup (collseqwc, wc);
- }
- }
- else if (br_elem->type == MB_CHAR)
+ /*
+ if (MB_CUR_MAX == 1)
+ */
+ if (nrules == 0)
+ return collseqmb[br_elem->opr.ch];
+ else
{
- if (nrules != 0)
- return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ wint_t wc = __btowc (br_elem->opr.ch);
+ return __collseq_table_lookup (collseqwc, wc);
}
- else if (br_elem->type == COLL_SYM)
+ }
+ else if (br_elem->type == MB_CHAR)
+ {
+ if (nrules != 0)
+ return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ }
+ else if (br_elem->type == COLL_SYM)
+ {
+ size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+ if (nrules != 0)
{
- size_t sym_name_len = strlen ((char *) br_elem->opr.name);
- if (nrules != 0)
+ int32_t elem, idx;
+ elem = seek_collating_symbol_entry (br_elem->opr.name,
+ sym_name_len,
+ symb_table, table_size, extra);
+ if (elem != -1)
{
- int32_t elem, idx;
- elem = seek_collating_symbol_entry (br_elem->opr.name,
- sym_name_len);
- if (elem != -1)
- {
- /* We found the entry. */
- idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- /* Skip the byte sequence of the collating element. */
- idx += 1 + extra[idx];
- /* Adjust for the alignment. */
- idx = (idx + 3) & ~3;
- /* Skip the multibyte collation sequence value. */
- idx += sizeof (unsigned int);
- /* Skip the wide char sequence of the collating element. */
- idx += sizeof (unsigned int) *
- (1 + *(unsigned int *) (extra + idx));
- /* Return the collation sequence value. */
- return *(unsigned int *) (extra + idx);
- }
- else if (sym_name_len == 1)
- {
- /* No valid character. Match it as a single byte
- character. */
- return collseqmb[br_elem->opr.name[0]];
- }
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ /* Skip the byte sequence of the collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the multibyte collation sequence value. */
+ idx += sizeof (unsigned int);
+ /* Skip the wide char sequence of the collating element. */
+ idx += sizeof (unsigned int) *
+ (1 + *(unsigned int *) (extra + idx));
+ /* Return the collation sequence value. */
+ return *(unsigned int *) (extra + idx);
}
else if (sym_name_len == 1)
- return collseqmb[br_elem->opr.name[0]];
+ {
+ /* No valid character. Match it as a single byte
+ character. */
+ return collseqmb[br_elem->opr.name[0]];
+ }
}
- return UINT_MAX;
+ else if (sym_name_len == 1)
+ return collseqmb[br_elem->opr.name[0]];
}
+ return UINT_MAX;
+}
- /* Local function for parse_bracket_exp used in _LIBC environement.
- Build the range expression which starts from START_ELEM, and ends
- at END_ELEM. The result are written to MBCSET and SBCSET.
- RANGE_ALLOC is the allocated size of mbcset->range_starts, and
- mbcset->range_ends, is a pointer argument sinse we may
- update it. */
+/* Static function for parse_bracket_exp used in _LIBC environement.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument sinse we may
+ update it. */
- auto inline reg_errcode_t
- __attribute ((always_inline))
- build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
- bracket_elem_t *start_elem, bracket_elem_t *end_elem)
- {
- unsigned int ch;
- uint32_t start_collseq;
- uint32_t end_collseq;
-
- /* Equivalence Classes and Character Classes can't be a range
- start/end. */
- if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
- || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
- 0))
- return REG_ERANGE;
+static inline reg_errcode_t
+__attribute ((always_inline))
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem,
+ reg_syntax_t syntax, re_dfa_t *dfa,
+ uint32_t nrules,
+ const unsigned char *collseqmb,
+ const char *collseqwc,
+ const int32_t *symb_table, int32_t table_size,
+ const unsigned char *extra)
+{
+ unsigned int ch;
+ uint32_t start_collseq;
+ uint32_t end_collseq;
- start_collseq = lookup_collation_sequence_value (start_elem);
- end_collseq = lookup_collation_sequence_value (end_elem);
- /* Check start/end collation sequence values. */
- if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
- return REG_ECOLLATE;
- if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
- return REG_ERANGE;
+ /* Equivalence Classes and Character Classes can't be a range
+ start/end. */
+ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+ 0))
+ return REG_ERANGE;
+
+ start_collseq = lookup_collation_sequence_value (start_elem,
+ nrules, collseqmb, collseqwc, symb_table, table_size, extra);
+ end_collseq = lookup_collation_sequence_value (end_elem,
+ nrules, collseqmb, collseqwc, symb_table, table_size, extra);
+ /* Check start/end collation sequence values. */
+ if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
+ return REG_ECOLLATE;
+ if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
+ return REG_ERANGE;
- /* Got valid collation sequence values, add them as a new entry.
- However, if we have no collation elements, and the character set
- is single byte, the single byte character set that we
- build below suffices. */
- if (nrules > 0 || dfa->mb_cur_max > 1)
+ /* Got valid collation sequence values, add them as a new entry.
+ However, if we have no collation elements, and the character set
+ is single byte, the single byte character set that we
+ build below suffices. */
+ if (nrules > 0 || dfa->mb_cur_max > 1)
+ {
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
{
- /* Check the space of the arrays. */
- if (BE (*range_alloc == mbcset->nranges, 0))
- {
- /* There is not enough space, need realloc. */
- uint32_t *new_array_start;
- uint32_t *new_array_end;
- int new_nranges;
-
- /* +1 in case of mbcset->nranges is 0. */
- new_nranges = 2 * mbcset->nranges + 1;
- new_array_start = re_realloc (mbcset->range_starts, uint32_t,
- new_nranges);
- new_array_end = re_realloc (mbcset->range_ends, uint32_t,
- new_nranges);
+ /* There is not enough space, need realloc. */
+ uint32_t *new_array_start;
+ uint32_t *new_array_end;
+ int new_nranges;
- if (BE (new_array_start == NULL || new_array_end == NULL, 0))
- return REG_ESPACE;
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+ new_nranges);
- mbcset->range_starts = new_array_start;
- mbcset->range_ends = new_array_end;
- *range_alloc = new_nranges;
- }
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
- mbcset->range_starts[mbcset->nranges] = start_collseq;
- mbcset->range_ends[mbcset->nranges++] = end_collseq;
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
}
- /* Build the table for single byte characters. */
- for (ch = 0; ch < SBC_MAX; ch++)
- {
- uint32_t ch_collseq;
- /*
- if (MB_CUR_MAX == 1)
- */
- if (nrules == 0)
- ch_collseq = collseqmb[ch];
- else
- ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
- if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
- bitset_set (sbcset, ch);
- }
- return REG_NOERROR;
+ mbcset->range_starts[mbcset->nranges] = start_collseq;
+ mbcset->range_ends[mbcset->nranges++] = end_collseq;
}
- /* Local function for parse_bracket_exp used in _LIBC environement.
- Build the collating element which is represented by NAME.
- The result are written to MBCSET and SBCSET.
- COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
- pointer argument sinse we may update it. */
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch < SBC_MAX; ch++)
+ {
+ uint32_t ch_collseq;
+ /*
+ if (MB_CUR_MAX == 1)
+ */
+ if (nrules == 0)
+ ch_collseq = collseqmb[ch];
+ else
+ ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+ if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+ bitset_set (sbcset, ch);
+ }
+ return REG_NOERROR;
+}
+
+/* Static function for parse_bracket_exp used in _LIBC environement.
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument sinse we may update it. */
- auto inline reg_errcode_t
- __attribute ((always_inline))
- build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
- int *coll_sym_alloc, const unsigned char *name)
+static inline reg_errcode_t
+__attribute ((always_inline))
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+ int *coll_sym_alloc, const unsigned char *name,
+ uint32_t nrules,
+ const int32_t *symb_table, int32_t table_size,
+ const unsigned char *extra)
+{
+ int32_t elem, idx;
+ size_t name_len = strlen ((const char *) name);
+ if (nrules != 0)
{
- int32_t elem, idx;
- size_t name_len = strlen ((const char *) name);
- if (nrules != 0)
+ elem = seek_collating_symbol_entry (name, name_len,
+ symb_table, table_size, extra);
+ if (elem != -1)
{
- elem = seek_collating_symbol_entry (name, name_len);
- if (elem != -1)
- {
- /* We found the entry. */
- idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- }
- else if (name_len == 1)
- {
- /* No valid character, treat it as a normal
- character. */
- bitset_set (sbcset, name[0]);
- return REG_NOERROR;
- }
- else
- return REG_ECOLLATE;
-
- /* Got valid collation sequence, add it as a new entry. */
- /* Check the space of the arrays. */
- if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
- {
- /* Not enough, realloc it. */
- /* +1 in case of mbcset->ncoll_syms is 0. */
- int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
- /* Use realloc since mbcset->coll_syms is NULL
- if *alloc == 0. */
- int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
- new_coll_sym_alloc);
- if (BE (new_coll_syms == NULL, 0))
- return REG_ESPACE;
- mbcset->coll_syms = new_coll_syms;
- *coll_sym_alloc = new_coll_sym_alloc;
- }
- mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ }
+ else if (name_len == 1)
+ {
+ /* No valid character, treat it as a normal
+ character. */
+ bitset_set (sbcset, name[0]);
return REG_NOERROR;
}
else
+ return REG_ECOLLATE;
+
+ /* Got valid collation sequence, add it as a new entry. */
+ /* Check the space of the arrays. */
+ if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
{
- if (BE (name_len != 1, 0))
- return REG_ECOLLATE;
- else
- {
- bitset_set (sbcset, name[0]);
- return REG_NOERROR;
- }
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->ncoll_syms is 0. */
+ int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+ /* Use realloc since mbcset->coll_syms is NULL
+ if *alloc == 0. */
+ int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+ new_coll_sym_alloc);
+ if (BE (new_coll_syms == NULL, 0))
+ return REG_ESPACE;
+ mbcset->coll_syms = new_coll_syms;
+ *coll_sym_alloc = new_coll_sym_alloc;
+ }
+ mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+ return REG_NOERROR;
+ }
+ else
+ {
+ if (BE (name_len != 1, 0))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
}
}
+}
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+ "[[.a-a.]]" etc. */
+
+static bin_tree_t *
+parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
+ reg_syntax_t syntax, reg_errcode_t *err)
+{
+#ifdef _LIBC
+ const unsigned char *collseqmb;
+ const char *collseqwc;
+ uint32_t nrules;
+ int32_t table_size;
+ const int32_t *symb_table;
+ const unsigned char *extra;
+
#endif
re_token_t br_token;
@@ -3169,14 +3194,20 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
#ifdef _LIBC
*err = build_range_exp (sbcset, mbcset, &range_alloc,
- &start_elem, &end_elem);
+ &start_elem, &end_elem,
+ syntax, dfa, nrules, collseqmb, collseqwc,
+ symb_table, table_size, extra);
#else
# ifdef RE_ENABLE_I18N
*err = build_range_exp (sbcset,
dfa->mb_cur_max > 1 ? mbcset : NULL,
- &range_alloc, &start_elem, &end_elem);
+ &range_alloc, &start_elem, &end_elem,
+ syntax, dfa, nrules, collseqmb, collseqwc,
+ symb_table, table_size, extra);
# else
- *err = build_range_exp (sbcset, &start_elem, &end_elem);
+ *err = build_range_exp (sbcset, &start_elem, &end_elem,
+ syntax, dfa, nrules, collseqmb, collseqwc,
+ symb_table, table_size, extra);
# endif
#endif /* RE_ENABLE_I18N */
if (BE (*err != REG_NOERROR, 0))
@@ -3222,7 +3253,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
#ifdef RE_ENABLE_I18N
mbcset, &coll_sym_alloc,
#endif /* RE_ENABLE_I18N */
- start_elem.opr.name);
+ start_elem.opr.name,
+ nrules,
+ symb_table, table_size, extra);
if (BE (*err != REG_NOERROR, 0))
goto parse_bracket_exp_free_return;
break;