This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
[PATCH] fix fall-out from Jakub's patches
- From: Paolo Bonzini <paolo dot bonzini at polimi dot it>
- To: libc-alpha at sources dot redhat dot com
- Date: Mon, 15 Dec 2003 11:22:58 +0100
- Subject: [PATCH] fix fall-out from Jakub's patches
- Reply-to: bonzini at gnu dot org
This patch brings regex back to compile when !RE_ENABLE_I18N.
WRT the optional subexpressions, the second patch fixed the segmentation
violation caused by the first alone; I simply had forgot a hunk.
However running the glibc exposed further cases which need a more
complicated approach.
In particular, consider a(b|((c)*))?d. With my approach \1 and \3 are
set correctly because they are repeated, but \2 is set wrong because it
is not inside a repeated subexpression. Avoiding all empty matches
inside a repeated subexpression is blatantly wrong because of (Z())*.
The right fix probably involves fiddling with the epsilon closure.
I have now embedded the glibc regex testsuite into the sed testsuite,
which is going to help in doing my changes.
Thanks for accepting the patches,
Paolo
Index: regcomp.c
===================================================================
RCS file: /cvs/glibc/libc/posix/regcomp.c,v
retrieving revision 1.67
diff -u -p -r1.67 regcomp.c
--- regcomp.c 29 Nov 2003 03:36:20 -0000 1.67
+++ regcomp.c 15 Dec 2003 09:38:10 -0000
@@ -406,8 +409,11 @@ re_compile_fastmap_iter (bufp, init_stat
}
}
#endif /* RE_ENABLE_I18N */
- else if (type == END_OF_RE || type == OP_PERIOD
- || type == OP_UTF8_PERIOD)
+ else if (type == OP_PERIOD
+#ifdef RE_ENABLE_I18N
+ || type == OP_UTF8_PERIOD
+#endif /* RE_ENABLE_I18N */
+ || type == END_OF_RE)
{
memset (fastmap, '\1', sizeof (char) * SBC_MAX);
if (type == END_OF_RE)
Index: regex_internal.c
===================================================================
RCS file: /cvs/glibc/libc/posix/regex_internal.c,v
retrieving revision 1.32
diff -u -p -r1.32 regex_internal.c
--- regex_internal.c 15 Dec 2003 00:45:07 -0000 1.32
+++ regex_internal.c 15 Dec 2003 09:38:11 -0000
@@ -786,7 +786,7 @@ re_string_peek_byte_case (const re_strin
static unsigned char
re_string_fetch_byte_case (re_string_t *pstr)
{
- int ch, off;
+ int ch;
if (BE (!pstr->icase, 1))
return re_string_fetch_byte (pstr);
@@ -794,6 +794,8 @@ re_string_fetch_byte_case (re_string_t *
#ifdef RE_ENABLE_I18N
if (pstr->offsets_needed)
{
+ int off;
+
/* For tr_TR.UTF-8 [[:islower:]] there is
[[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
in that case the whole multi-byte character and return
Index: regexec.c
===================================================================
RCS file: /cvs/glibc/libc/posix/regexec.c,v
retrieving revision 1.42
diff -u -p -r1.42 regexec.c
--- regexec.c 14 Dec 2003 23:40:44 -0000 1.42
+++ regexec.c 15 Dec 2003 09:38:11 -0000
@@ -807,6 +807,7 @@ re_search_internal (preg, string, length
for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
if (pmatch[reg_idx].rm_so != -1)
{
+#ifdef RE_ENABLE_I18N
if (BE (input.offsets_needed != 0, 0))
{
if (pmatch[reg_idx].rm_so == input.valid_len)
@@ -818,6 +819,9 @@ re_search_internal (preg, string, length
else
pmatch[reg_idx].rm_eo = input.offsets[pmatch[reg_idx].rm_eo];
}
+#else
+ assert (input.offsets_needed == 0);
+#endif
pmatch[reg_idx].rm_so += match_first;
pmatch[reg_idx].rm_eo += match_first;
}
@@ -3365,6 +3401,7 @@ group_nodes_into_DFAstates (preg, state,
if (preg->syntax & RE_DOT_NOT_NULL)
bitset_clear (accepts, '\0');
}
+#ifdef RE_ENABLE_I18N
else if (type == OP_UTF8_PERIOD)
{
memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
@@ -3373,6 +3410,7 @@ group_nodes_into_DFAstates (preg, state,
if (preg->syntax & RE_DOT_NOT_NULL)
bitset_clear (accepts, '\0');
}
+#endif
else
continue;
@@ -3820,10 +3858,12 @@ check_node_accept (preg, node, mctx, idx
return node->opr.c == ch;
case SIMPLE_BRACKET:
return bitset_contain (node->opr.sbcset, ch);
+#ifdef RE_ENABLE_I18N
case OP_UTF8_PERIOD:
if (ch >= 0x80)
return 0;
/* FALLTHROUGH */
+#endif
case OP_PERIOD:
return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
|| (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));