]> sourceware.org Git - glibc.git/blame - posix/regex_internal.c
Update.
[glibc.git] / posix / regex_internal.c
CommitLineData
3b0bdc72 1/* Extended regular expression matching and search library.
54e1cabc 2 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
3b0bdc72
UD
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
92b27c74 21static void re_string_construct_common (const char *str, int len,
1b2c2628
UD
22 re_string_t *pstr,
23 RE_TRANSLATE_TYPE trans, int icase);
434d3784 24#ifdef RE_ENABLE_I18N
1843975c
RM
25static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
26 wint_t *last_wc);
434d3784 27#endif /* RE_ENABLE_I18N */
3b0bdc72 28static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
1b2c2628
UD
29 const re_node_set *nodes,
30 unsigned int hash);
a9388965 31static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
1b2c2628 32 unsigned int hash);
3b0bdc72 33static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
1b2c2628
UD
34 const re_node_set *nodes,
35 unsigned int hash);
3b0bdc72 36static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
1b2c2628
UD
37 const re_node_set *nodes,
38 unsigned int context,
39 unsigned int hash);
3b0bdc72 40static unsigned int inline calc_state_hash (const re_node_set *nodes,
1b2c2628 41 unsigned int context);
3b0bdc72
UD
42\f
43/* Functions for string operation. */
44
612546c6
UD
45/* This function allocate the buffers. It is necessary to call
46 re_string_reconstruct before using the object. */
47
3b0bdc72 48static reg_errcode_t
612546c6 49re_string_allocate (pstr, str, len, init_len, trans, icase)
3b0bdc72 50 re_string_t *pstr;
92b27c74 51 const char *str;
612546c6 52 int len, init_len, icase;
3b0bdc72
UD
53 RE_TRANSLATE_TYPE trans;
54{
55 reg_errcode_t ret;
612546c6
UD
56 int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
57 re_string_construct_common (str, len, pstr, trans, icase);
ac3d553b 58 pstr->stop = pstr->len;
612546c6
UD
59
60 ret = re_string_realloc_buffers (pstr, init_buf_len);
61 if (BE (ret != REG_NOERROR, 0))
62 return ret;
63
c202c2c5 64 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
1b2c2628 65 : (unsigned char *) str);
612546c6
UD
66 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
67 pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
1b2c2628 68 || MB_CUR_MAX > 1) ? pstr->valid_len : len;
612546c6
UD
69 return REG_NOERROR;
70}
71
72/* This function allocate the buffers, and initialize them. */
73
74static reg_errcode_t
75re_string_construct (pstr, str, len, trans, icase)
76 re_string_t *pstr;
92b27c74 77 const char *str;
612546c6
UD
78 int len, icase;
79 RE_TRANSLATE_TYPE trans;
80{
81 reg_errcode_t ret;
82 re_string_construct_common (str, len, pstr, trans, icase);
ac3d553b 83 pstr->stop = pstr->len;
612546c6
UD
84 /* Set 0 so that this function can initialize whole buffers. */
85 pstr->valid_len = 0;
86
87 if (len > 0)
3b0bdc72 88 {
612546c6 89 ret = re_string_realloc_buffers (pstr, len + 1);
bc15410e 90 if (BE (ret != REG_NOERROR, 0))
1b2c2628 91 return ret;
3b0bdc72 92 }
c202c2c5 93 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
1b2c2628 94 : (unsigned char *) str);
612546c6
UD
95 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
96
97 if (icase)
98 {
99#ifdef RE_ENABLE_I18N
100 if (MB_CUR_MAX > 1)
1b2c2628 101 build_wcs_upper_buffer (pstr);
612546c6 102 else
3b0bdc72 103#endif /* RE_ENABLE_I18N */
1b2c2628 104 build_upper_buffer (pstr);
612546c6
UD
105 }
106 else
3b0bdc72 107 {
612546c6
UD
108#ifdef RE_ENABLE_I18N
109 if (MB_CUR_MAX > 1)
1b2c2628 110 build_wcs_buffer (pstr);
612546c6
UD
111 else
112#endif /* RE_ENABLE_I18N */
1b2c2628
UD
113 {
114 if (trans != NULL)
115 re_string_translate_buffer (pstr);
116 else
117 pstr->valid_len = len;
118 }
3b0bdc72 119 }
612546c6
UD
120
121 /* Initialized whole buffers, then valid_len == bufs_len. */
122 pstr->valid_len = pstr->bufs_len;
3b0bdc72
UD
123 return REG_NOERROR;
124}
125
612546c6 126/* Helper functions for re_string_allocate, and re_string_construct. */
3b0bdc72
UD
127
128static reg_errcode_t
612546c6 129re_string_realloc_buffers (pstr, new_buf_len)
3b0bdc72 130 re_string_t *pstr;
612546c6 131 int new_buf_len;
3b0bdc72 132{
3b0bdc72 133#ifdef RE_ENABLE_I18N
612546c6 134 if (MB_CUR_MAX > 1)
3b0bdc72 135 {
1b2c2628
UD
136 wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
137 if (BE (new_array == NULL, 0))
138 return REG_ESPACE;
139 pstr->wcs = new_array;
3b0bdc72 140 }
3b0bdc72 141#endif /* RE_ENABLE_I18N */
612546c6 142 if (MBS_ALLOCATED (pstr))
3b0bdc72 143 {
1b2c2628
UD
144 unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
145 new_buf_len);
146 if (BE (new_array == NULL, 0))
147 return REG_ESPACE;
148 pstr->mbs = new_array;
3b0bdc72 149 }
612546c6 150 if (MBS_CASE_ALLOCATED (pstr))
3b0bdc72 151 {
1b2c2628
UD
152 unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
153 new_buf_len);
154 if (BE (new_array == NULL, 0))
155 return REG_ESPACE;
156 pstr->mbs_case = new_array;
612546c6 157 if (!MBS_ALLOCATED (pstr))
1b2c2628 158 pstr->mbs = pstr->mbs_case;
3b0bdc72 159 }
612546c6 160 pstr->bufs_len = new_buf_len;
3b0bdc72
UD
161 return REG_NOERROR;
162}
163
612546c6 164
3b0bdc72 165static void
612546c6 166re_string_construct_common (str, len, pstr, trans, icase)
92b27c74 167 const char *str;
3b0bdc72
UD
168 int len;
169 re_string_t *pstr;
612546c6
UD
170 RE_TRANSLATE_TYPE trans;
171 int icase;
3b0bdc72 172{
612546c6 173 memset (pstr, '\0', sizeof (re_string_t));
c202c2c5 174 pstr->raw_mbs = (const unsigned char *) str;
3b0bdc72 175 pstr->len = len;
612546c6
UD
176 pstr->trans = trans;
177 pstr->icase = icase ? 1 : 0;
3b0bdc72
UD
178}
179
180#ifdef RE_ENABLE_I18N
181
612546c6 182/* Build wide character buffer PSTR->WCS.
3b0bdc72
UD
183 If the byte sequence of the string are:
184 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
185 Then wide character buffer will be:
186 <wc1> , WEOF , <wc2> , WEOF , <wc3>
187 We use WEOF for padding, they indicate that the position isn't
612546c6 188 a first byte of a multibyte character.
3b0bdc72 189
612546c6
UD
190 Note that this function assumes PSTR->VALID_LEN elements are already
191 built and starts from PSTR->VALID_LEN. */
192
193static void
3b0bdc72
UD
194build_wcs_buffer (pstr)
195 re_string_t *pstr;
196{
612546c6
UD
197 mbstate_t prev_st;
198 int byte_idx, end_idx, mbclen, remain_len;
199 /* Build the buffers from pstr->valid_len to either pstr->len or
200 pstr->bufs_len. */
201 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
202 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
203 {
204 wchar_t wc;
205 remain_len = end_idx - byte_idx;
206 prev_st = pstr->cur_state;
c202c2c5 207 mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
1b2c2628 208 + byte_idx), remain_len, &pstr->cur_state);
612546c6 209 if (BE (mbclen == (size_t) -2, 0))
1b2c2628
UD
210 {
211 /* The buffer doesn't have enough space, finish to build. */
212 pstr->cur_state = prev_st;
213 break;
214 }
612546c6 215 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
1b2c2628
UD
216 {
217 /* We treat these cases as a singlebyte character. */
218 mbclen = 1;
219 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
220 pstr->cur_state = prev_st;
221 }
612546c6 222
74e12fbc 223 /* Apply the translation if we need. */
612546c6 224 if (pstr->trans != NULL && mbclen == 1)
1b2c2628
UD
225 {
226 int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
227 pstr->mbs_case[byte_idx] = ch;
228 }
3b0bdc72 229 /* Write wide character and padding. */
612546c6
UD
230 pstr->wcs[byte_idx++] = wc;
231 /* Write paddings. */
232 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
1b2c2628 233 pstr->wcs[byte_idx++] = WEOF;
3b0bdc72 234 }
612546c6 235 pstr->valid_len = byte_idx;
3b0bdc72
UD
236}
237
612546c6
UD
238/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
239 but for REG_ICASE. */
240
241static void
3b0bdc72
UD
242build_wcs_upper_buffer (pstr)
243 re_string_t *pstr;
244{
612546c6
UD
245 mbstate_t prev_st;
246 int byte_idx, end_idx, mbclen, remain_len;
247 /* Build the buffers from pstr->valid_len to either pstr->len or
248 pstr->bufs_len. */
249 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
250 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
251 {
252 wchar_t wc;
253 remain_len = end_idx - byte_idx;
254 prev_st = pstr->cur_state;
c202c2c5 255 mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
1b2c2628 256 + byte_idx), remain_len, &pstr->cur_state);
612546c6 257 if (BE (mbclen == (size_t) -2, 0))
1b2c2628
UD
258 {
259 /* The buffer doesn't have enough space, finish to build. */
260 pstr->cur_state = prev_st;
261 break;
262 }
612546c6 263 else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
1b2c2628
UD
264 {
265 /* In case of a singlebyte character. */
266 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
74e12fbc 267 /* Apply the translation if we need. */
1b2c2628
UD
268 if (pstr->trans != NULL && mbclen == 1)
269 {
270 ch = pstr->trans[ch];
271 pstr->mbs_case[byte_idx] = ch;
272 }
74e12fbc 273 pstr->wcs[byte_idx] = iswlower (wc) ? towupper (wc) : wc;
1b2c2628
UD
274 pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
275 if (BE (mbclen == (size_t) -1, 0))
276 pstr->cur_state = prev_st;
277 }
3b0bdc72 278 else /* mbclen > 1 */
1b2c2628
UD
279 {
280 if (iswlower (wc))
281 wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
282 else
283 memcpy (pstr->mbs + byte_idx,
284 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
74e12fbc 285 pstr->wcs[byte_idx++] = iswlower (wc) ? towupper (wc) : wc;
1b2c2628
UD
286 /* Write paddings. */
287 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
288 pstr->wcs[byte_idx++] = WEOF;
289 }
3b0bdc72 290 }
612546c6
UD
291 pstr->valid_len = byte_idx;
292}
293
294/* Skip characters until the index becomes greater than NEW_RAW_IDX.
295 Return the index. */
296
297static int
1843975c 298re_string_skip_chars (pstr, new_raw_idx, last_wc)
612546c6
UD
299 re_string_t *pstr;
300 int new_raw_idx;
1843975c 301 wint_t *last_wc;
612546c6
UD
302{
303 mbstate_t prev_st;
304 int rawbuf_idx, mbclen;
1843975c 305 wchar_t wc = 0;
612546c6
UD
306
307 /* Skip the characters which are not necessary to check. */
308 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
309 rawbuf_idx < new_raw_idx;)
310 {
1843975c
RM
311 int remain_len;
312 remain_len = pstr->len - rawbuf_idx;
612546c6 313 prev_st = pstr->cur_state;
1843975c
RM
314 mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
315 remain_len, &pstr->cur_state);
612546c6 316 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
1b2c2628
UD
317 {
318 /* We treat these cases as a singlebyte character. */
319 mbclen = 1;
320 pstr->cur_state = prev_st;
321 }
612546c6
UD
322 /* Then proceed the next character. */
323 rawbuf_idx += mbclen;
324 }
1843975c 325 *last_wc = (wint_t) wc;
612546c6 326 return rawbuf_idx;
3b0bdc72
UD
327}
328#endif /* RE_ENABLE_I18N */
329
1b2c2628 330/* Build the buffer PSTR->MBS, and apply the translation if we need.
612546c6
UD
331 This function is used in case of REG_ICASE. */
332
333static void
3b0bdc72
UD
334build_upper_buffer (pstr)
335 re_string_t *pstr;
336{
612546c6
UD
337 int char_idx, end_idx;
338 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
3b0bdc72 339
612546c6 340 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
3b0bdc72 341 {
612546c6
UD
342 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
343 if (pstr->trans != NULL)
1b2c2628 344 {
74e12fbc 345 ch = pstr->trans[ch];
1b2c2628
UD
346 pstr->mbs_case[char_idx] = ch;
347 }
612546c6 348 if (islower (ch))
1b2c2628 349 pstr->mbs[char_idx] = toupper (ch);
3b0bdc72 350 else
1b2c2628 351 pstr->mbs[char_idx] = ch;
3b0bdc72 352 }
612546c6 353 pstr->valid_len = char_idx;
3b0bdc72
UD
354}
355
612546c6 356/* Apply TRANS to the buffer in PSTR. */
3b0bdc72 357
612546c6
UD
358static void
359re_string_translate_buffer (pstr)
3b0bdc72 360 re_string_t *pstr;
3b0bdc72 361{
612546c6
UD
362 int buf_idx, end_idx;
363 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
364
365 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
3b0bdc72 366 {
612546c6
UD
367 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
368 pstr->mbs_case[buf_idx] = pstr->trans[ch];
3b0bdc72 369 }
612546c6
UD
370
371 pstr->valid_len = buf_idx;
372}
373
374/* This function re-construct the buffers.
375 Concretely, convert to wide character in case of MB_CUR_MAX > 1,
376 convert to upper case in case of REG_ICASE, apply translation. */
377
378static reg_errcode_t
379re_string_reconstruct (pstr, idx, eflags, newline)
380 re_string_t *pstr;
381 int idx, eflags, newline;
382{
383 int offset = idx - pstr->raw_mbs_idx;
384 if (offset < 0)
385 {
386 /* Reset buffer. */
434d3784
UD
387#ifdef RE_ENABLE_I18N
388 if (MB_CUR_MAX > 1)
1b2c2628 389 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
434d3784 390#endif /* RE_ENABLE_I18N */
dd385d7c
UD
391 pstr->len += pstr->raw_mbs_idx;
392 pstr->stop += pstr->raw_mbs_idx;
612546c6
UD
393 pstr->valid_len = pstr->raw_mbs_idx = 0;
394 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
1b2c2628 395 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
612546c6 396 if (!MBS_CASE_ALLOCATED (pstr))
1b2c2628 397 pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
612546c6 398 if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
1b2c2628 399 pstr->mbs = (unsigned char *) pstr->raw_mbs;
612546c6
UD
400 offset = idx;
401 }
402
403 if (offset != 0)
404 {
612546c6
UD
405 /* Are the characters which are already checked remain? */
406 if (offset < pstr->valid_len)
1b2c2628
UD
407 {
408 /* Yes, move them to the front of the buffer. */
1843975c
RM
409 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
410 newline);
3b0bdc72 411#ifdef RE_ENABLE_I18N
1b2c2628
UD
412 if (MB_CUR_MAX > 1)
413 memmove (pstr->wcs, pstr->wcs + offset,
414 (pstr->valid_len - offset) * sizeof (wint_t));
612546c6 415#endif /* RE_ENABLE_I18N */
1b2c2628
UD
416 if (MBS_ALLOCATED (pstr))
417 memmove (pstr->mbs, pstr->mbs + offset,
418 pstr->valid_len - offset);
419 if (MBS_CASE_ALLOCATED (pstr))
420 memmove (pstr->mbs_case, pstr->mbs_case + offset,
421 pstr->valid_len - offset);
422 pstr->valid_len -= offset;
612546c6 423#if DEBUG
1b2c2628 424 assert (pstr->valid_len > 0);
3b0bdc72 425#endif
1b2c2628 426 }
612546c6 427 else
1b2c2628
UD
428 {
429 /* No, skip all characters until IDX. */
430 pstr->valid_len = 0;
3b0bdc72 431#ifdef RE_ENABLE_I18N
1b2c2628
UD
432 if (MB_CUR_MAX > 1)
433 {
434 int wcs_idx;
1843975c
RM
435 wint_t wc;
436 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
1b2c2628
UD
437 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
438 pstr->wcs[wcs_idx] = WEOF;
1843975c
RM
439 if (pstr->trans && wc <= 0xff)
440 wc = pstr->trans[wc];
441 pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
442 : ((newline && IS_WIDE_NEWLINE (wc))
443 ? CONTEXT_NEWLINE : 0));
1b2c2628 444 }
1843975c 445 else
612546c6 446#endif /* RE_ENABLE_I18N */
1843975c
RM
447 {
448 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
449 if (pstr->trans)
450 c = pstr->trans[c];
451 pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD
452 : ((newline && IS_NEWLINE (c))
453 ? CONTEXT_NEWLINE : 0));
454 }
1b2c2628 455 }
612546c6 456 if (!MBS_CASE_ALLOCATED (pstr))
1b2c2628
UD
457 {
458 pstr->mbs_case += offset;
459 /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
460 if (!MBS_ALLOCATED (pstr))
461 pstr->mbs += offset;
462 }
3b0bdc72 463 }
612546c6
UD
464 pstr->raw_mbs_idx = idx;
465 pstr->len -= offset;
ac3d553b 466 pstr->stop -= offset;
612546c6
UD
467
468 /* Then build the buffers. */
469#ifdef RE_ENABLE_I18N
470 if (MB_CUR_MAX > 1)
3b0bdc72 471 {
612546c6 472 if (pstr->icase)
1b2c2628 473 build_wcs_upper_buffer (pstr);
612546c6 474 else
1b2c2628 475 build_wcs_buffer (pstr);
3b0bdc72
UD
476 }
477 else
612546c6 478#endif /* RE_ENABLE_I18N */
3b0bdc72 479 {
612546c6 480 if (pstr->icase)
1b2c2628 481 build_upper_buffer (pstr);
612546c6 482 else if (pstr->trans != NULL)
1b2c2628 483 re_string_translate_buffer (pstr);
3b0bdc72 484 }
612546c6
UD
485 pstr->cur_idx = 0;
486
3b0bdc72
UD
487 return REG_NOERROR;
488}
489
490static void
491re_string_destruct (pstr)
492 re_string_t *pstr;
493{
494#ifdef RE_ENABLE_I18N
495 re_free (pstr->wcs);
496#endif /* RE_ENABLE_I18N */
612546c6
UD
497 if (MBS_ALLOCATED (pstr))
498 re_free (pstr->mbs);
499 if (MBS_CASE_ALLOCATED (pstr))
500 re_free (pstr->mbs_case);
3b0bdc72
UD
501}
502
503/* Return the context at IDX in INPUT. */
612546c6 504
3b0bdc72
UD
505static unsigned int
506re_string_context_at (input, idx, eflags, newline_anchor)
507 const re_string_t *input;
508 int idx, eflags, newline_anchor;
509{
510 int c;
511 if (idx < 0 || idx == input->len)
512 {
3b0bdc72 513 if (idx < 0)
1b2c2628
UD
514 /* In this case, we use the value stored in input->tip_context,
515 since we can't know the character in input->mbs[-1] here. */
516 return input->tip_context;
bc15410e 517 else /* (idx == input->len) */
1b2c2628
UD
518 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
519 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
3b0bdc72 520 }
a0756231
RM
521#ifdef RE_ENABLE_I18N
522 if (MB_CUR_MAX > 1)
1843975c
RM
523 {
524 wint_t wc;
525 int wc_idx = idx;
526 while(input->wcs[wc_idx] == WEOF)
527 {
528#ifdef DEBUG
529 /* It must not happen. */
530 assert (wc_idx >= 0);
531#endif
532 --wc_idx;
533 if (wc_idx < 0)
534 return input->tip_context;
535 }
536 wc = input->wcs[wc_idx];
537 if (IS_WIDE_WORD_CHAR (wc))
538 return CONTEXT_WORD;
539 return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0;
540 }
a0756231
RM
541 else
542#endif
543 {
544 c = re_string_byte_at (input, idx);
545 if (IS_WORD_CHAR (c))
546 return CONTEXT_WORD;
547 return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
548 }
3b0bdc72
UD
549}
550\f
551/* Functions for set operation. */
552
553static reg_errcode_t
554re_node_set_alloc (set, size)
555 re_node_set *set;
556 int size;
557{
558 set->alloc = size;
559 set->nelem = 0;
560 set->elems = re_malloc (int, size);
bc15410e 561 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
562 return REG_ESPACE;
563 return REG_NOERROR;
564}
565
566static reg_errcode_t
567re_node_set_init_1 (set, elem)
568 re_node_set *set;
569 int elem;
570{
571 set->alloc = 1;
572 set->nelem = 1;
573 set->elems = re_malloc (int, 1);
bc15410e 574 if (BE (set->elems == NULL, 0))
6291ee3c
UD
575 {
576 set->alloc = set->nelem = 0;
577 return REG_ESPACE;
578 }
3b0bdc72
UD
579 set->elems[0] = elem;
580 return REG_NOERROR;
581}
582
583static reg_errcode_t
584re_node_set_init_2 (set, elem1, elem2)
585 re_node_set *set;
586 int elem1, elem2;
587{
588 set->alloc = 2;
589 set->elems = re_malloc (int, 2);
bc15410e 590 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
591 return REG_ESPACE;
592 if (elem1 == elem2)
593 {
594 set->nelem = 1;
595 set->elems[0] = elem1;
596 }
597 else
598 {
599 set->nelem = 2;
600 if (elem1 < elem2)
1b2c2628
UD
601 {
602 set->elems[0] = elem1;
603 set->elems[1] = elem2;
604 }
3b0bdc72 605 else
1b2c2628
UD
606 {
607 set->elems[0] = elem2;
608 set->elems[1] = elem1;
609 }
3b0bdc72
UD
610 }
611 return REG_NOERROR;
612}
613
614static reg_errcode_t
615re_node_set_init_copy (dest, src)
616 re_node_set *dest;
617 const re_node_set *src;
618{
619 dest->nelem = src->nelem;
620 if (src->nelem > 0)
621 {
622 dest->alloc = dest->nelem;
623 dest->elems = re_malloc (int, dest->alloc);
bc15410e 624 if (BE (dest->elems == NULL, 0))
6291ee3c
UD
625 {
626 dest->alloc = dest->nelem = 0;
627 return REG_ESPACE;
628 }
3b0bdc72
UD
629 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
630 }
631 else
632 re_node_set_init_empty (dest);
633 return REG_NOERROR;
634}
635
a9388965
UD
636/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
637 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
638 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
639
3b0bdc72
UD
640static reg_errcode_t
641re_node_set_add_intersect (dest, src1, src2)
642 re_node_set *dest;
643 const re_node_set *src1, *src2;
644{
645 int i1, i2, id;
646 if (src1->nelem > 0 && src2->nelem > 0)
647 {
648 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1b2c2628
UD
649 {
650 dest->alloc = src1->nelem + src2->nelem + dest->nelem;
651 dest->elems = re_realloc (dest->elems, int, dest->alloc);
652 if (BE (dest->elems == NULL, 0))
653 return REG_ESPACE;
654 }
3b0bdc72
UD
655 }
656 else
657 return REG_NOERROR;
658
659 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
660 {
661 if (src1->elems[i1] > src2->elems[i2])
1b2c2628
UD
662 {
663 ++i2;
664 continue;
665 }
3b0bdc72 666 if (src1->elems[i1] == src2->elems[i2])
1b2c2628
UD
667 {
668 while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
669 ++id;
670 if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
671 ++id;
672 else
673 {
674 memmove (dest->elems + id + 1, dest->elems + id,
675 sizeof (int) * (dest->nelem - id));
676 dest->elems[id++] = src2->elems[i2++];
677 ++dest->nelem;
678 }
679 }
3b0bdc72
UD
680 ++i1;
681 }
682 return REG_NOERROR;
683}
684
a9388965
UD
685/* Calculate the union set of the sets SRC1 and SRC2. And store it to
686 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
687
3b0bdc72
UD
688static reg_errcode_t
689re_node_set_init_union (dest, src1, src2)
690 re_node_set *dest;
691 const re_node_set *src1, *src2;
692{
693 int i1, i2, id;
694 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
695 {
696 dest->alloc = src1->nelem + src2->nelem;
697 dest->elems = re_malloc (int, dest->alloc);
bc15410e 698 if (BE (dest->elems == NULL, 0))
1b2c2628 699 return REG_ESPACE;
3b0bdc72
UD
700 }
701 else
702 {
703 if (src1 != NULL && src1->nelem > 0)
1b2c2628 704 return re_node_set_init_copy (dest, src1);
3b0bdc72 705 else if (src2 != NULL && src2->nelem > 0)
1b2c2628 706 return re_node_set_init_copy (dest, src2);
3b0bdc72 707 else
1b2c2628 708 re_node_set_init_empty (dest);
3b0bdc72
UD
709 return REG_NOERROR;
710 }
711 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
712 {
713 if (src1->elems[i1] > src2->elems[i2])
1b2c2628
UD
714 {
715 dest->elems[id++] = src2->elems[i2++];
716 continue;
717 }
3b0bdc72 718 if (src1->elems[i1] == src2->elems[i2])
1b2c2628 719 ++i2;
3b0bdc72
UD
720 dest->elems[id++] = src1->elems[i1++];
721 }
722 if (i1 < src1->nelem)
723 {
724 memcpy (dest->elems + id, src1->elems + i1,
1b2c2628 725 (src1->nelem - i1) * sizeof (int));
3b0bdc72
UD
726 id += src1->nelem - i1;
727 }
728 else if (i2 < src2->nelem)
729 {
730 memcpy (dest->elems + id, src2->elems + i2,
1b2c2628 731 (src2->nelem - i2) * sizeof (int));
3b0bdc72
UD
732 id += src2->nelem - i2;
733 }
734 dest->nelem = id;
735 return REG_NOERROR;
736}
737
a9388965
UD
738/* Calculate the union set of the sets DEST and SRC. And store it to
739 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
740
3b0bdc72
UD
741static reg_errcode_t
742re_node_set_merge (dest, src)
743 re_node_set *dest;
744 const re_node_set *src;
745{
746 int si, di;
747 if (src == NULL || src->nelem == 0)
748 return REG_NOERROR;
3b0bdc72
UD
749 if (dest->alloc < src->nelem + dest->nelem)
750 {
1b2c2628 751 int *new_buffer;
3b0bdc72 752 dest->alloc = 2 * (src->nelem + dest->alloc);
1b2c2628
UD
753 new_buffer = re_realloc (dest->elems, int, dest->alloc);
754 if (BE (new_buffer == NULL, 0))
755 return REG_ESPACE;
756 dest->elems = new_buffer;
3b0bdc72
UD
757 }
758
759 for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
760 {
761 int cp_from, ncp, mid, right, src_elem = src->elems[si];
762 /* Binary search the spot we will add the new element. */
763 right = dest->nelem;
764 while (di < right)
1b2c2628
UD
765 {
766 mid = (di + right) / 2;
767 if (dest->elems[mid] < src_elem)
768 di = mid + 1;
769 else
770 right = mid;
771 }
3b0bdc72 772 if (di >= dest->nelem)
1b2c2628 773 break;
3b0bdc72
UD
774
775 if (dest->elems[di] == src_elem)
1b2c2628
UD
776 {
777 /* Skip since, DEST already has the element. */
778 ++di;
779 ++si;
780 continue;
781 }
3b0bdc72
UD
782
783 /* Skip the src elements which are less than dest->elems[di]. */
784 cp_from = si;
785 while (si < src->nelem && src->elems[si] < dest->elems[di])
1b2c2628 786 ++si;
3b0bdc72
UD
787 /* Copy these src elements. */
788 ncp = si - cp_from;
789 memmove (dest->elems + di + ncp, dest->elems + di,
1b2c2628 790 sizeof (int) * (dest->nelem - di));
3b0bdc72 791 memcpy (dest->elems + di, src->elems + cp_from,
1b2c2628 792 sizeof (int) * ncp);
3b0bdc72
UD
793 /* Update counters. */
794 di += ncp;
795 dest->nelem += ncp;
796 }
797
798 /* Copy remaining src elements. */
799 if (si < src->nelem)
800 {
801 memcpy (dest->elems + di, src->elems + si,
1b2c2628 802 sizeof (int) * (src->nelem - si));
3b0bdc72
UD
803 dest->nelem += src->nelem - si;
804 }
805 return REG_NOERROR;
806}
807
808/* Insert the new element ELEM to the re_node_set* SET.
809 return 0 if SET already has ELEM,
810 return -1 if an error is occured, return 1 otherwise. */
811
812static int
813re_node_set_insert (set, elem)
814 re_node_set *set;
815 int elem;
816{
817 int idx, right, mid;
818 /* In case of the set is empty. */
819 if (set->elems == NULL || set->alloc == 0)
820 {
bc15410e 821 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1b2c2628 822 return 1;
3b0bdc72 823 else
1b2c2628 824 return -1;
3b0bdc72
UD
825 }
826
827 /* Binary search the spot we will add the new element. */
828 idx = 0;
829 right = set->nelem;
830 while (idx < right)
831 {
832 mid = (idx + right) / 2;
833 if (set->elems[mid] < elem)
1b2c2628 834 idx = mid + 1;
3b0bdc72 835 else
1b2c2628 836 right = mid;
3b0bdc72
UD
837 }
838
839 /* Realloc if we need. */
840 if (set->alloc < set->nelem + 1)
841 {
842 int *new_array;
843 set->alloc = set->alloc * 2;
844 new_array = re_malloc (int, set->alloc);
bc15410e 845 if (BE (new_array == NULL, 0))
1b2c2628 846 return -1;
3b0bdc72
UD
847 /* Copy the elements they are followed by the new element. */
848 if (idx > 0)
1b2c2628 849 memcpy (new_array, set->elems, sizeof (int) * (idx));
3b0bdc72
UD
850 /* Copy the elements which follows the new element. */
851 if (set->nelem - idx > 0)
1b2c2628 852 memcpy (new_array + idx + 1, set->elems + idx,
3b0bdc72 853 sizeof (int) * (set->nelem - idx));
612546c6 854 re_free (set->elems);
3b0bdc72
UD
855 set->elems = new_array;
856 }
857 else
858 {
859 /* Move the elements which follows the new element. */
860 if (set->nelem - idx > 0)
1b2c2628
UD
861 memmove (set->elems + idx + 1, set->elems + idx,
862 sizeof (int) * (set->nelem - idx));
3b0bdc72
UD
863 }
864 /* Insert the new element. */
865 set->elems[idx] = elem;
866 ++set->nelem;
867 return 1;
868}
869
870/* Compare two node sets SET1 and SET2.
871 return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */
872
873static int
874re_node_set_compare (set1, set2)
875 const re_node_set *set1, *set2;
876{
877 int i;
878 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
879 return 0;
880 for (i = 0 ; i < set1->nelem ; i++)
881 if (set1->elems[i] != set2->elems[i])
882 return 0;
883 return 1;
884}
885
0742e48e 886/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
3b0bdc72
UD
887
888static int
889re_node_set_contains (set, elem)
890 const re_node_set *set;
891 int elem;
892{
893 int idx, right, mid;
894 if (set->nelem <= 0)
895 return 0;
896
897 /* Binary search the element. */
898 idx = 0;
899 right = set->nelem - 1;
900 while (idx < right)
901 {
902 mid = (idx + right) / 2;
903 if (set->elems[mid] < elem)
1b2c2628 904 idx = mid + 1;
3b0bdc72 905 else
1b2c2628 906 right = mid;
3b0bdc72 907 }
0742e48e 908 return set->elems[idx] == elem ? idx + 1 : 0;
3b0bdc72
UD
909}
910
911static void
912re_node_set_remove_at (set, idx)
913 re_node_set *set;
914 int idx;
915{
916 if (idx < 0 || idx >= set->nelem)
917 return;
918 if (idx < set->nelem - 1)
919 memmove (set->elems + idx, set->elems + idx + 1,
1b2c2628 920 sizeof (int) * (set->nelem - idx - 1));
3b0bdc72
UD
921 --set->nelem;
922}
923\f
924
925/* Add the token TOKEN to dfa->nodes, and return the index of the token.
926 Or return -1, if an error will be occured. */
927
928static int
929re_dfa_add_node (dfa, token, mode)
930 re_dfa_t *dfa;
931 re_token_t token;
932 int mode;
933{
934 if (dfa->nodes_len >= dfa->nodes_alloc)
935 {
936 re_token_t *new_array;
937 dfa->nodes_alloc *= 2;
938 new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
bc15410e 939 if (BE (new_array == NULL, 0))
1b2c2628 940 return -1;
3b0bdc72 941 else
1b2c2628 942 dfa->nodes = new_array;
3b0bdc72 943 if (mode)
1b2c2628 944 {
a7d5c291 945 int *new_nexts, *new_indices;
1b2c2628
UD
946 re_node_set *new_edests, *new_eclosures, *new_inveclosures;
947
948 new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
a7d5c291 949 new_indices = re_realloc (dfa->org_indices, int, dfa->nodes_alloc);
1b2c2628
UD
950 new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
951 new_eclosures = re_realloc (dfa->eclosures, re_node_set,
952 dfa->nodes_alloc);
953 new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
954 dfa->nodes_alloc);
a7d5c291
UD
955 if (BE (new_nexts == NULL || new_indices == NULL
956 || new_edests == NULL || new_eclosures == NULL
957 || new_inveclosures == NULL, 0))
1b2c2628
UD
958 return -1;
959 dfa->nexts = new_nexts;
a7d5c291 960 dfa->org_indices = new_indices;
1b2c2628
UD
961 dfa->edests = new_edests;
962 dfa->eclosures = new_eclosures;
963 dfa->inveclosures = new_inveclosures;
964 }
3b0bdc72
UD
965 }
966 dfa->nodes[dfa->nodes_len] = token;
967 dfa->nodes[dfa->nodes_len].duplicated = 0;
485d775d 968 dfa->nodes[dfa->nodes_len].constraint = 0;
3b0bdc72
UD
969 return dfa->nodes_len++;
970}
971
972static unsigned int inline
973calc_state_hash (nodes, context)
974 const re_node_set *nodes;
975 unsigned int context;
976{
977 unsigned int hash = nodes->nelem + context;
978 int i;
979 for (i = 0 ; i < nodes->nelem ; i++)
980 hash += nodes->elems[i];
981 return hash;
982}
983
984/* Search for the state whose node_set is equivalent to NODES.
985 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
986 Otherwise create the new one and return it. In case of an error
987 return NULL and set the error code in ERR.
988 Note: - We assume NULL as the invalid state, then it is possible that
1b2c2628
UD
989 return value is NULL and ERR is REG_NOERROR.
990 - We never return non-NULL value in case of any errors, it is for
991 optimization. */
a9388965
UD
992
993static re_dfastate_t*
994re_acquire_state (err, dfa, nodes)
995 reg_errcode_t *err;
3b0bdc72
UD
996 re_dfa_t *dfa;
997 const re_node_set *nodes;
998{
999 unsigned int hash;
a9388965 1000 re_dfastate_t *new_state;
3b0bdc72
UD
1001 struct re_state_table_entry *spot;
1002 int i;
bc15410e 1003 if (BE (nodes->nelem == 0, 0))
a9388965
UD
1004 {
1005 *err = REG_NOERROR;
1006 return NULL;
1007 }
3b0bdc72
UD
1008 hash = calc_state_hash (nodes, 0);
1009 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1010
bc15410e 1011 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1012 {
bc15410e
UD
1013 re_dfastate_t *state = spot->array[i];
1014 if (hash != state->hash)
1b2c2628 1015 continue;
bc15410e 1016 if (re_node_set_compare (&state->nodes, nodes))
1b2c2628 1017 return state;
3b0bdc72 1018 }
3b0bdc72
UD
1019
1020 /* There are no appropriate state in the dfa, create the new one. */
a9388965 1021 new_state = create_ci_newstate (dfa, nodes, hash);
bc15410e 1022 if (BE (new_state != NULL, 1))
a9388965
UD
1023 return new_state;
1024 else
1025 {
1026 *err = REG_ESPACE;
1027 return NULL;
1028 }
3b0bdc72
UD
1029}
1030
1031/* Search for the state whose node_set is equivalent to NODES and
1032 whose context is equivalent to CONTEXT.
1033 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
1034 Otherwise create the new one and return it. In case of an error
1035 return NULL and set the error code in ERR.
1036 Note: - We assume NULL as the invalid state, then it is possible that
1b2c2628
UD
1037 return value is NULL and ERR is REG_NOERROR.
1038 - We never return non-NULL value in case of any errors, it is for
1039 optimization. */
a9388965
UD
1040
1041static re_dfastate_t*
1042re_acquire_state_context (err, dfa, nodes, context)
1043 reg_errcode_t *err;
3b0bdc72
UD
1044 re_dfa_t *dfa;
1045 const re_node_set *nodes;
1046 unsigned int context;
1047{
1048 unsigned int hash;
a9388965 1049 re_dfastate_t *new_state;
3b0bdc72
UD
1050 struct re_state_table_entry *spot;
1051 int i;
1052 if (nodes->nelem == 0)
a9388965
UD
1053 {
1054 *err = REG_NOERROR;
1055 return NULL;
1056 }
3b0bdc72
UD
1057 hash = calc_state_hash (nodes, context);
1058 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1059
bc15410e 1060 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1061 {
bc15410e
UD
1062 re_dfastate_t *state = spot->array[i];
1063 if (hash != state->hash)
1b2c2628 1064 continue;
bc15410e 1065 if (re_node_set_compare (state->entrance_nodes, nodes)
1b2c2628
UD
1066 && state->context == context)
1067 return state;
3b0bdc72 1068 }
3b0bdc72 1069 /* There are no appropriate state in `dfa', create the new one. */
a9388965 1070 new_state = create_cd_newstate (dfa, nodes, context, hash);
bc15410e 1071 if (BE (new_state != NULL, 1))
a9388965
UD
1072 return new_state;
1073 else
1074 {
1075 *err = REG_ESPACE;
1076 return NULL;
1077 }
3b0bdc72
UD
1078}
1079
a9388965
UD
1080/* Allocate memory for DFA state and initialize common properties.
1081 Return the new state if succeeded, otherwise return NULL. */
1082
3b0bdc72
UD
1083static re_dfastate_t *
1084create_newstate_common (dfa, nodes, hash)
1085 re_dfa_t *dfa;
1086 const re_node_set *nodes;
1087 unsigned int hash;
1088{
1089 re_dfastate_t *newstate;
1b2c2628 1090 reg_errcode_t err;
3b0bdc72 1091 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
bc15410e 1092 if (BE (newstate == NULL, 0))
a9388965 1093 return NULL;
1b2c2628
UD
1094 err = re_node_set_init_copy (&newstate->nodes, nodes);
1095 if (BE (err != REG_NOERROR, 0))
1096 {
1097 re_free (newstate);
1098 return NULL;
1099 }
3b0bdc72
UD
1100 newstate->trtable = NULL;
1101 newstate->trtable_search = NULL;
1102 newstate->hash = hash;
1103 return newstate;
1104}
1105
a9388965
UD
1106/* Store the new state NEWSTATE whose hash value is HASH in appropriate
1107 position. Return value indicate the error code if failed. */
1108
1109static reg_errcode_t
3b0bdc72
UD
1110register_state (dfa, newstate, hash)
1111 re_dfa_t *dfa;
1112 re_dfastate_t *newstate;
1113 unsigned int hash;
1114{
1115 struct re_state_table_entry *spot;
1116 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1117
1118 if (spot->alloc <= spot->num)
1119 {
1b2c2628 1120 re_dfastate_t **new_array;
bc15410e 1121 spot->alloc = 2 * spot->num + 2;
1b2c2628
UD
1122 new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
1123 if (BE (new_array == NULL, 0))
1124 return REG_ESPACE;
1125 spot->array = new_array;
3b0bdc72 1126 }
bc15410e 1127 spot->array[spot->num++] = newstate;
a9388965 1128 return REG_NOERROR;
3b0bdc72
UD
1129}
1130
a9388965
UD
1131/* Create the new state which is independ of contexts.
1132 Return the new state if succeeded, otherwise return NULL. */
1133
3b0bdc72
UD
1134static re_dfastate_t *
1135create_ci_newstate (dfa, nodes, hash)
1136 re_dfa_t *dfa;
1137 const re_node_set *nodes;
1138 unsigned int hash;
1139{
1140 int i;
a9388965 1141 reg_errcode_t err;
3b0bdc72
UD
1142 re_dfastate_t *newstate;
1143 newstate = create_newstate_common (dfa, nodes, hash);
bc15410e 1144 if (BE (newstate == NULL, 0))
a9388965 1145 return NULL;
3b0bdc72
UD
1146 newstate->entrance_nodes = &newstate->nodes;
1147
1148 for (i = 0 ; i < nodes->nelem ; i++)
1149 {
1150 re_token_t *node = dfa->nodes + nodes->elems[i];
1151 re_token_type_t type = node->type;
485d775d 1152 if (type == CHARACTER && !node->constraint)
1b2c2628 1153 continue;
3b0bdc72
UD
1154
1155 /* If the state has the halt node, the state is a halt state. */
1156 else if (type == END_OF_RE)
1b2c2628 1157 newstate->halt = 1;
c0a0f9a3 1158#ifdef RE_ENABLE_I18N
3b0bdc72 1159 else if (type == COMPLEX_BRACKET
1b2c2628
UD
1160 || (type == OP_PERIOD && MB_CUR_MAX > 1))
1161 newstate->accept_mb = 1;
c0a0f9a3 1162#endif /* RE_ENABLE_I18N */
3b0bdc72 1163 else if (type == OP_BACK_REF)
1b2c2628 1164 newstate->has_backref = 1;
485d775d 1165 else if (type == ANCHOR || node->constraint)
1b2c2628 1166 newstate->has_constraint = 1;
3b0bdc72 1167 }
a9388965 1168 err = register_state (dfa, newstate, hash);
1b2c2628
UD
1169 if (BE (err != REG_NOERROR, 0))
1170 {
1171 free_state (newstate);
1172 newstate = NULL;
1173 }
1174 return newstate;
3b0bdc72
UD
1175}
1176
a9388965
UD
1177/* Create the new state which is depend on the context CONTEXT.
1178 Return the new state if succeeded, otherwise return NULL. */
1179
3b0bdc72
UD
1180static re_dfastate_t *
1181create_cd_newstate (dfa, nodes, context, hash)
1182 re_dfa_t *dfa;
1183 const re_node_set *nodes;
1184 unsigned int context, hash;
1185{
1186 int i, nctx_nodes = 0;
a9388965 1187 reg_errcode_t err;
3b0bdc72
UD
1188 re_dfastate_t *newstate;
1189
1190 newstate = create_newstate_common (dfa, nodes, hash);
bc15410e 1191 if (BE (newstate == NULL, 0))
a9388965 1192 return NULL;
3b0bdc72
UD
1193 newstate->context = context;
1194 newstate->entrance_nodes = &newstate->nodes;
1195
1196 for (i = 0 ; i < nodes->nelem ; i++)
1197 {
1198 unsigned int constraint = 0;
1199 re_token_t *node = dfa->nodes + nodes->elems[i];
1200 re_token_type_t type = node->type;
485d775d 1201 if (node->constraint)
1b2c2628 1202 constraint = node->constraint;
3b0bdc72 1203
485d775d 1204 if (type == CHARACTER && !constraint)
1b2c2628 1205 continue;
3b0bdc72
UD
1206 /* If the state has the halt node, the state is a halt state. */
1207 else if (type == END_OF_RE)
1b2c2628 1208 newstate->halt = 1;
c0a0f9a3 1209#ifdef RE_ENABLE_I18N
3b0bdc72 1210 else if (type == COMPLEX_BRACKET
1b2c2628
UD
1211 || (type == OP_PERIOD && MB_CUR_MAX > 1))
1212 newstate->accept_mb = 1;
c0a0f9a3 1213#endif /* RE_ENABLE_I18N */
3b0bdc72 1214 else if (type == OP_BACK_REF)
1b2c2628 1215 newstate->has_backref = 1;
3b0bdc72 1216 else if (type == ANCHOR)
1b2c2628 1217 constraint = node->opr.ctx_type;
3b0bdc72
UD
1218
1219 if (constraint)
1b2c2628
UD
1220 {
1221 if (newstate->entrance_nodes == &newstate->nodes)
1222 {
1223 newstate->entrance_nodes = re_malloc (re_node_set, 1);
bc15410e 1224 if (BE (newstate->entrance_nodes == NULL, 0))
1b2c2628
UD
1225 {
1226 free_state (newstate);
1227 return NULL;
1228 }
1229 re_node_set_init_copy (newstate->entrance_nodes, nodes);
1230 nctx_nodes = 0;
1231 newstate->has_constraint = 1;
1232 }
1233
1234 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1235 {
1236 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1237 ++nctx_nodes;
1238 }
1239 }
3b0bdc72 1240 }
a9388965 1241 err = register_state (dfa, newstate, hash);
1b2c2628
UD
1242 if (BE (err != REG_NOERROR, 0))
1243 {
1244 free_state (newstate);
1245 newstate = NULL;
1246 }
1247 return newstate;
1248}
1249
1250static void
1251free_state (state)
1252 re_dfastate_t *state;
1253{
1254 if (state->entrance_nodes != &state->nodes)
1255 {
1256 re_node_set_free (state->entrance_nodes);
1257 re_free (state->entrance_nodes);
1258 }
1259 re_node_set_free (&state->nodes);
1260 re_free (state->trtable);
1261 re_free (state->trtable_search);
1262 re_free (state);
3b0bdc72 1263}
This page took 0.237527 seconds and 5 git commands to generate.