This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.

Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Fix wrong implementation of \B (BZ #693)


This is pretty easy to do, as all the code is already there to implement
\b.  Just like \b is lowered to \<|\>, \B is lowered to the disjunction
of two (otherwise unavailable) constraints, "inside word" and "outside
word".

The patch is on top of the BZ #605 and BZ #611 patches (you were waiting
for the BZ 605 changelog, but it can be found in the bugzilla audit
trail, as well as in the ping I sent you by private mail :-).

Once my patch queue is over (apart from these, there are just a couple
more), I'll backport this to 2.3 unless someone beats me to it.

Paolo




2005-01-26  Paolo Bonzini  <bonzini@gnu.org>

	* posix/regcomp.c (peek_token): Fix ctx_type for \B.
	(parse_expression): Lower token->opr.ctx_type == NOT_WORD_DELIM.
 	* posix/regex_internal.h (re_context_type): Add NOT_WORD_DELIM
	and OUTSIDE_WORD.
	* posix/PCRE.tests: Adjust \B tests to check if it matches outside
	a word.


--- orig/posix/regcomp.c
+++ mod/posix/regcomp.c
@@ -1864,7 +1864,7 @@ peek_token (token, input, syntax)
 	  if (!(syntax & RE_NO_GNU_OPS))
 	    {
 	      token->type = ANCHOR;
-	      token->opr.ctx_type = INSIDE_WORD;
+	      token->opr.ctx_type = NOT_WORD_DELIM;
 	    }
 	  break;
 	case 'w':
@@ -2352,15 +2352,16 @@ parse_expression (regexp, preg, token, s
       break;
     case ANCHOR:
       if ((token->opr.ctx_type
-	   & (WORD_DELIM | INSIDE_WORD | WORD_FIRST | WORD_LAST))
+	   & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
 	  && dfa->word_ops_used == 0)
 	init_word_char (dfa);
-      if (token->opr.ctx_type == WORD_DELIM)
+      if (token->opr.ctx_type >= DUMMY_CONSTRAINT)
 	{
+	  int delim = (token->opr.ctx_type == WORD_DELIM);
 	  bin_tree_t *tree_first, *tree_last;
-	  token->opr.ctx_type = WORD_FIRST;
+	  token->opr.ctx_type = delim ? WORD_FIRST : INSIDE_WORD;
 	  tree_first = create_token_tree (dfa, NULL, NULL, token);
-	  token->opr.ctx_type = WORD_LAST;
+	  token->opr.ctx_type = delim ? WORD_LAST : OUTSIDE_WORD;
 	  tree_last = create_token_tree (dfa, NULL, NULL, token);
 	  tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
 	  if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))


--- orig/posix/regex_internal.h
+++ mod/posix/regex_internal.h
@@ -148,13 +148,15 @@ static inline void bitset_mask (bitset d
 typedef enum
 {
   INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+  OUTSIDE_WORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
   WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
   WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
   LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
   LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
   BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
   BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
-  WORD_DELIM = DUMMY_CONSTRAINT
+  WORD_DELIM = DUMMY_CONSTRAINT,
+  NOT_WORD_DELIM = DUMMY_CONSTRAINT << 1,
 } re_context_type;
 
 typedef struct


--- orig/posix/PCRE.tests
+++ mod/posix/PCRE.tests
@@ -1420,17 +1420,23 @@ No match
     -a-
 No match
 
-/\By\b/
+/\B.\b/
     xy
  0: y
+    x.!?y
+ 0: ?
 
-/\by\B/
+/\b.\B/
     yz
  0: y
+    x.!?y
+ 0: .
 
-/\By\B/
+/\B.\B/
     xyz
  0: y
+    x.!?y
+ 0: !
 
 /\w/
     a








Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]