]> sourceware.org Git - glibc.git/blame - posix/regex.c
(collate_output): Update.
[glibc.git] / posix / regex.c
CommitLineData
2b83a2a4
RM
1/* Extended regular expression matching and search library,
2 version 0.12.
51702635 3 (Implements POSIX draft P1003.2/D11.2, except for some of the
2b83a2a4 4 internationalization features.)
4caef86c 5 Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
2b83a2a4 6
c84142e8
UD
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
2b83a2a4 11
c84142e8
UD
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Library General Public License for more details.
2b83a2a4 16
c84142e8
UD
17 You should have received a copy of the GNU Library General Public
18 License along with the GNU C Library; see the file COPYING.LIB. If not,
19 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
2b83a2a4
RM
21
22/* AIX requires this to be the first thing in the file. */
86187531 23#if defined _AIX && !defined REGEX_MALLOC
2b83a2a4
RM
24 #pragma alloca
25#endif
26
80b55d32 27#undef _GNU_SOURCE
2b83a2a4
RM
28#define _GNU_SOURCE
29
30#ifdef HAVE_CONFIG_H
86187531 31# include <config.h>
2b83a2a4
RM
32#endif
33
07b51ba5
UD
34#ifndef PARAMS
35# if defined __GNUC__ || (defined __STDC__ && __STDC__)
36# define PARAMS(args) args
37# else
38# define PARAMS(args) ()
39# endif /* GCC. */
40#endif /* Not PARAMS. */
41
86187531
UD
42#if defined STDC_HEADERS && !defined emacs
43# include <stddef.h>
4cca6b86 44#else
2b83a2a4 45/* We need this for `regex.h', and perhaps for the Emacs include files. */
86187531 46# include <sys/types.h>
4cca6b86 47#endif
2b83a2a4 48
409dfcea 49#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
7ce241a0 50
51702635
UD
51/* For platform which support the ISO C amendement 1 functionality we
52 support user defined character classes. */
409dfcea 53#if defined _LIBC || WIDE_CHAR_SUPPORT
7ba4fcfc 54/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
51702635 55# include <wchar.h>
7ba4fcfc 56# include <wctype.h>
a9ddb793 57#endif
2ad4fab2 58
a9ddb793 59#ifdef _LIBC
2ad4fab2
UD
60/* We have to keep the namespace clean. */
61# define regfree(preg) __regfree (preg)
62# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
63# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
64# define regerror(errcode, preg, errbuf, errbuf_size) \
65 __regerror(errcode, preg, errbuf, errbuf_size)
66# define re_set_registers(bu, re, nu, st, en) \
67 __re_set_registers (bu, re, nu, st, en)
68# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
69 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
70# define re_match(bufp, string, size, pos, regs) \
71 __re_match (bufp, string, size, pos, regs)
72# define re_search(bufp, string, size, startpos, range, regs) \
73 __re_search (bufp, string, size, startpos, range, regs)
74# define re_compile_pattern(pattern, length, bufp) \
75 __re_compile_pattern (pattern, length, bufp)
76# define re_set_syntax(syntax) __re_set_syntax (syntax)
77# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
78 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
79# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
80
a63a3c2c
UD
81# define btowc __btowc
82
83/* We are also using some library internals. */
84# include <locale/localeinfo.h>
3216711f 85# include <locale/elem-hash.h>
a63a3c2c 86# include <langinfo.h>
51702635
UD
87#endif
88
2b83a2a4 89/* This is for other GNU distributions with internationalized messages. */
86187531 90#if HAVE_LIBINTL_H || defined _LIBC
2b83a2a4
RM
91# include <libintl.h>
92#else
93# define gettext(msgid) (msgid)
94#endif
95
91c7b85d
RM
96#ifndef gettext_noop
97/* This define is so xgettext can find the internationalizable
98 strings. */
86187531 99# define gettext_noop(String) String
91c7b85d
RM
100#endif
101
2b83a2a4
RM
102/* The `emacs' switch turns on certain matching commands
103 that make sense only in Emacs. */
104#ifdef emacs
105
86187531
UD
106# include "lisp.h"
107# include "buffer.h"
108# include "syntax.h"
2b83a2a4
RM
109
110#else /* not emacs */
111
112/* If we are not linking with Emacs proper,
113 we can't use the relocating allocator
114 even if config.h says that we can. */
86187531 115# undef REL_ALLOC
2b83a2a4 116
86187531
UD
117# if defined STDC_HEADERS || defined _LIBC
118# include <stdlib.h>
119# else
2b83a2a4
RM
120char *malloc ();
121char *realloc ();
86187531 122# endif
2b83a2a4 123
5bf62f2d
RM
124/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
125 If nothing else has been done, use the method below. */
86187531
UD
126# ifdef INHIBIT_STRING_HEADER
127# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
128# if !defined bzero && !defined bcopy
129# undef INHIBIT_STRING_HEADER
130# endif
131# endif
132# endif
5bf62f2d
RM
133
134/* This is the normal way of making sure we have a bcopy and a bzero.
135 This is used in most programs--a few other programs avoid this
136 by defining INHIBIT_STRING_HEADER. */
86187531
UD
137# ifndef INHIBIT_STRING_HEADER
138# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
139# include <string.h>
2ad4fab2
UD
140# ifndef bzero
141# ifndef _LIBC
142# define bzero(s, n) (memset (s, '\0', n), (s))
143# else
144# define bzero(s, n) __bzero (s, n)
145# endif
86187531
UD
146# endif
147# else
148# include <strings.h>
149# ifndef memcmp
150# define memcmp(s1, s2, n) bcmp (s1, s2, n)
151# endif
152# ifndef memcpy
153# define memcpy(d, s, n) (bcopy (s, d, n), (d))
154# endif
155# endif
156# endif
2b83a2a4
RM
157
158/* Define the syntax stuff for \<, \>, etc. */
159
160/* This must be nonzero for the wordchar and notwordchar pattern
161 commands in re_match_2. */
86187531
UD
162# ifndef Sword
163# define Sword 1
164# endif
2b83a2a4 165
86187531
UD
166# ifdef SWITCH_ENUM_BUG
167# define SWITCH_ENUM_CAST(x) ((int)(x))
168# else
169# define SWITCH_ENUM_CAST(x) (x)
170# endif
2b83a2a4 171
2b83a2a4
RM
172#endif /* not emacs */
173\f
174/* Get the interface, including the syntax bits. */
5c2a0669 175#include <regex.h>
2b83a2a4
RM
176
177/* isalpha etc. are used for the character classes. */
178#include <ctype.h>
179
180/* Jim Meyering writes:
181
182 "... Some ctype macros are valid only for character codes that
183 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
184 using /bin/cc or gcc but without giving an ansi option). So, all
185 ctype uses should be through macros like ISPRINT... If
186 STDC_HEADERS is defined, then autoconf has verified that the ctype
187 macros don't need to be guarded with references to isascii. ...
188 Defining isascii to 1 should let any compiler worth its salt
06698672
UD
189 eliminate the && through constant folding."
190 Solaris defines some of these symbols so we must undefine them first. */
2b83a2a4 191
06698672 192#undef ISASCII
86187531
UD
193#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
194# define ISASCII(c) 1
2b83a2a4 195#else
86187531 196# define ISASCII(c) isascii(c)
2b83a2a4
RM
197#endif
198
199#ifdef isblank
86187531 200# define ISBLANK(c) (ISASCII (c) && isblank (c))
2b83a2a4 201#else
86187531 202# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
2b83a2a4
RM
203#endif
204#ifdef isgraph
86187531 205# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
2b83a2a4 206#else
86187531 207# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
2b83a2a4
RM
208#endif
209
06698672 210#undef ISPRINT
2b83a2a4
RM
211#define ISPRINT(c) (ISASCII (c) && isprint (c))
212#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
213#define ISALNUM(c) (ISASCII (c) && isalnum (c))
214#define ISALPHA(c) (ISASCII (c) && isalpha (c))
215#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
216#define ISLOWER(c) (ISASCII (c) && islower (c))
217#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
218#define ISSPACE(c) (ISASCII (c) && isspace (c))
219#define ISUPPER(c) (ISASCII (c) && isupper (c))
220#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
221
4caef86c
UD
222#ifdef _tolower
223# define TOLOWER(c) _tolower(c)
224#else
225# define TOLOWER(c) tolower(c)
226#endif
227
2b83a2a4 228#ifndef NULL
86187531 229# define NULL (void *)0
2b83a2a4
RM
230#endif
231
232/* We remove any previous definition of `SIGN_EXTEND_CHAR',
233 since ours (we hope) works properly with all combinations of
234 machines, compilers, `char' and `unsigned char' argument types.
235 (Per Bothner suggested the basic approach.) */
236#undef SIGN_EXTEND_CHAR
237#if __STDC__
86187531 238# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
2b83a2a4
RM
239#else /* not __STDC__ */
240/* As in Harbison and Steele. */
86187531 241# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
2b83a2a4
RM
242#endif
243\f
2864e767
UD
244#ifndef emacs
245/* How many characters in the character set. */
246# define CHAR_SET_SIZE 256
247
248# ifdef SYNTAX_TABLE
249
250extern char *re_syntax_table;
251
252# else /* not SYNTAX_TABLE */
253
254static char re_syntax_table[CHAR_SET_SIZE];
255
256static void
257init_syntax_once ()
258{
259 register int c;
260 static int done = 0;
261
262 if (done)
263 return;
264 bzero (re_syntax_table, sizeof re_syntax_table);
265
266 for (c = 0; c < CHAR_SET_SIZE; ++c)
267 if (ISALNUM (c))
268 re_syntax_table[c] = Sword;
269
270 re_syntax_table['_'] = Sword;
271
272 done = 1;
273}
274
275# endif /* not SYNTAX_TABLE */
276
7186e974 277# define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
2864e767
UD
278
279#endif /* emacs */
280\f
2b83a2a4
RM
281/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
282 use `alloca' instead of `malloc'. This is because using malloc in
283 re_search* or re_match* could cause memory leaks when C-g is used in
284 Emacs; also, malloc is slower and causes storage fragmentation. On
91c7b85d
RM
285 the other hand, malloc is more portable, and easier to debug.
286
2b83a2a4
RM
287 Because we sometimes use alloca, some routines have to be macros,
288 not functions -- `alloca'-allocated space disappears at the end of the
289 function it is called in. */
290
291#ifdef REGEX_MALLOC
292
86187531
UD
293# define REGEX_ALLOCATE malloc
294# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
295# define REGEX_FREE free
2b83a2a4
RM
296
297#else /* not REGEX_MALLOC */
298
299/* Emacs already defines alloca, sometimes. */
86187531 300# ifndef alloca
2b83a2a4
RM
301
302/* Make alloca work the best possible way. */
86187531
UD
303# ifdef __GNUC__
304# define alloca __builtin_alloca
305# else /* not __GNUC__ */
306# if HAVE_ALLOCA_H
307# include <alloca.h>
308# endif /* HAVE_ALLOCA_H */
309# endif /* not __GNUC__ */
2b83a2a4 310
86187531 311# endif /* not alloca */
2b83a2a4 312
86187531 313# define REGEX_ALLOCATE alloca
2b83a2a4
RM
314
315/* Assumes a `char *destination' variable. */
86187531 316# define REGEX_REALLOCATE(source, osize, nsize) \
2b83a2a4 317 (destination = (char *) alloca (nsize), \
86187531 318 memcpy (destination, source, osize))
2b83a2a4
RM
319
320/* No need to do anything to free, after alloca. */
86187531 321# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
2b83a2a4
RM
322
323#endif /* not REGEX_MALLOC */
324
325/* Define how to allocate the failure stack. */
326
86187531 327#if defined REL_ALLOC && defined REGEX_MALLOC
ff48a63c 328
86187531 329# define REGEX_ALLOCATE_STACK(size) \
2b83a2a4 330 r_alloc (&failure_stack_ptr, (size))
86187531 331# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
2b83a2a4 332 r_re_alloc (&failure_stack_ptr, (nsize))
86187531 333# define REGEX_FREE_STACK(ptr) \
2b83a2a4
RM
334 r_alloc_free (&failure_stack_ptr)
335
ff48a63c 336#else /* not using relocating allocator */
2b83a2a4 337
86187531 338# ifdef REGEX_MALLOC
2b83a2a4 339
86187531
UD
340# define REGEX_ALLOCATE_STACK malloc
341# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
342# define REGEX_FREE_STACK free
2b83a2a4 343
86187531 344# else /* not REGEX_MALLOC */
2b83a2a4 345
86187531 346# define REGEX_ALLOCATE_STACK alloca
2b83a2a4 347
86187531 348# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
2b83a2a4
RM
349 REGEX_REALLOCATE (source, osize, nsize)
350/* No need to explicitly free anything. */
86187531 351# define REGEX_FREE_STACK(arg)
2b83a2a4 352
86187531 353# endif /* not REGEX_MALLOC */
ff48a63c 354#endif /* not using relocating allocator */
2b83a2a4
RM
355
356
357/* True if `size1' is non-NULL and PTR is pointing anywhere inside
358 `string1' or just past its end. This works if PTR is NULL, which is
359 a good thing. */
360#define FIRST_STRING_P(ptr) \
361 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
362
363/* (Re)Allocate N items of type T using malloc, or fail. */
364#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
365#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
366#define RETALLOC_IF(addr, n, t) \
367 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
368#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
369
370#define BYTEWIDTH 8 /* In bits. */
371
372#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
373
374#undef MAX
375#undef MIN
376#define MAX(a, b) ((a) > (b) ? (a) : (b))
377#define MIN(a, b) ((a) < (b) ? (a) : (b))
378
379typedef char boolean;
380#define false 0
381#define true 1
382
07b51ba5
UD
383static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
384 const char *string1, int size1,
385 const char *string2, int size2,
386 int pos,
387 struct re_registers *regs,
388 int stop));
2b83a2a4
RM
389\f
390/* These are the command codes that appear in compiled regular
391 expressions. Some opcodes are followed by argument bytes. A
392 command code can specify any interpretation whatsoever for its
393 arguments. Zero bytes may appear in the compiled regular expression. */
394
395typedef enum
396{
397 no_op = 0,
398
399 /* Succeed right away--no more backtracking. */
400 succeed,
401
402 /* Followed by one byte giving n, then by n literal bytes. */
403 exactn,
404
405 /* Matches any (more or less) character. */
406 anychar,
407
408 /* Matches any one char belonging to specified set. First
409 following byte is number of bitmap bytes. Then come bytes
410 for a bitmap saying which chars are in. Bits in each byte
411 are ordered low-bit-first. A character is in the set if its
412 bit is 1. A character too large to have a bit in the map is
413 automatically not in the set. */
414 charset,
415
416 /* Same parameters as charset, but match any character that is
417 not one of those specified. */
418 charset_not,
419
420 /* Start remembering the text that is matched, for storing in a
421 register. Followed by one byte with the register number, in
422 the range 0 to one less than the pattern buffer's re_nsub
423 field. Then followed by one byte with the number of groups
424 inner to this one. (This last has to be part of the
425 start_memory only because we need it in the on_failure_jump
426 of re_match_2.) */
427 start_memory,
428
429 /* Stop remembering the text that is matched and store it in a
430 memory register. Followed by one byte with the register
431 number, in the range 0 to one less than `re_nsub' in the
432 pattern buffer, and one byte with the number of inner groups,
433 just like `start_memory'. (We need the number of inner
434 groups here because we don't have any easy way of finding the
435 corresponding start_memory when we're at a stop_memory.) */
436 stop_memory,
437
438 /* Match a duplicate of something remembered. Followed by one
439 byte containing the register number. */
440 duplicate,
441
442 /* Fail unless at beginning of line. */
443 begline,
444
445 /* Fail unless at end of line. */
446 endline,
447
448 /* Succeeds if at beginning of buffer (if emacs) or at beginning
449 of string to be matched (if not). */
450 begbuf,
451
452 /* Analogously, for end of buffer/string. */
453 endbuf,
91c7b85d 454
2b83a2a4 455 /* Followed by two byte relative address to which to jump. */
91c7b85d 456 jump,
2b83a2a4
RM
457
458 /* Same as jump, but marks the end of an alternative. */
459 jump_past_alt,
460
461 /* Followed by two-byte relative address of place to resume at
462 in case of failure. */
463 on_failure_jump,
91c7b85d 464
2b83a2a4
RM
465 /* Like on_failure_jump, but pushes a placeholder instead of the
466 current string position when executed. */
467 on_failure_keep_string_jump,
91c7b85d 468
2b83a2a4
RM
469 /* Throw away latest failure point and then jump to following
470 two-byte relative address. */
471 pop_failure_jump,
472
473 /* Change to pop_failure_jump if know won't have to backtrack to
474 match; otherwise change to jump. This is used to jump
475 back to the beginning of a repeat. If what follows this jump
476 clearly won't match what the repeat does, such that we can be
477 sure that there is no use backtracking out of repetitions
478 already matched, then we change it to a pop_failure_jump.
479 Followed by two-byte address. */
480 maybe_pop_jump,
481
482 /* Jump to following two-byte address, and push a dummy failure
483 point. This failure point will be thrown away if an attempt
484 is made to use it for a failure. A `+' construct makes this
485 before the first repeat. Also used as an intermediary kind
486 of jump when compiling an alternative. */
487 dummy_failure_jump,
488
489 /* Push a dummy failure point and continue. Used at the end of
490 alternatives. */
491 push_dummy_failure,
492
493 /* Followed by two-byte relative address and two-byte number n.
494 After matching N times, jump to the address upon failure. */
495 succeed_n,
496
497 /* Followed by two-byte relative address, and two-byte number n.
498 Jump to the address N times, then fail. */
499 jump_n,
500
501 /* Set the following two-byte relative address to the
502 subsequent two-byte number. The address *includes* the two
503 bytes of number. */
504 set_number_at,
505
506 wordchar, /* Matches any word-constituent character. */
507 notwordchar, /* Matches any char that is not a word-constituent. */
508
509 wordbeg, /* Succeeds if at word beginning. */
510 wordend, /* Succeeds if at word end. */
511
512 wordbound, /* Succeeds if at a word boundary. */
513 notwordbound /* Succeeds if not at a word boundary. */
514
515#ifdef emacs
516 ,before_dot, /* Succeeds if before point. */
517 at_dot, /* Succeeds if at point. */
518 after_dot, /* Succeeds if after point. */
519
520 /* Matches any character whose syntax is specified. Followed by
521 a byte which contains a syntax code, e.g., Sword. */
522 syntaxspec,
523
524 /* Matches any character whose syntax is not that specified. */
525 notsyntaxspec
526#endif /* emacs */
527} re_opcode_t;
528\f
529/* Common operations on the compiled pattern. */
530
531/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
532
533#define STORE_NUMBER(destination, number) \
534 do { \
535 (destination)[0] = (number) & 0377; \
536 (destination)[1] = (number) >> 8; \
537 } while (0)
538
539/* Same as STORE_NUMBER, except increment DESTINATION to
540 the byte after where the number is stored. Therefore, DESTINATION
541 must be an lvalue. */
542
543#define STORE_NUMBER_AND_INCR(destination, number) \
544 do { \
545 STORE_NUMBER (destination, number); \
546 (destination) += 2; \
547 } while (0)
548
549/* Put into DESTINATION a number stored in two contiguous bytes starting
550 at SOURCE. */
551
552#define EXTRACT_NUMBER(destination, source) \
553 do { \
554 (destination) = *(source) & 0377; \
555 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
556 } while (0)
557
558#ifdef DEBUG
4cca6b86 559static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
2b83a2a4
RM
560static void
561extract_number (dest, source)
562 int *dest;
563 unsigned char *source;
564{
91c7b85d 565 int temp = SIGN_EXTEND_CHAR (*(source + 1));
2b83a2a4
RM
566 *dest = *source & 0377;
567 *dest += temp << 8;
568}
569
86187531
UD
570# ifndef EXTRACT_MACROS /* To debug the macros. */
571# undef EXTRACT_NUMBER
572# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
573# endif /* not EXTRACT_MACROS */
2b83a2a4
RM
574
575#endif /* DEBUG */
576
577/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
578 SOURCE must be an lvalue. */
579
580#define EXTRACT_NUMBER_AND_INCR(destination, source) \
581 do { \
582 EXTRACT_NUMBER (destination, source); \
583 (source) += 2; \
584 } while (0)
585
586#ifdef DEBUG
4cca6b86
UD
587static void extract_number_and_incr _RE_ARGS ((int *destination,
588 unsigned char **source));
2b83a2a4
RM
589static void
590extract_number_and_incr (destination, source)
591 int *destination;
592 unsigned char **source;
91c7b85d 593{
2b83a2a4
RM
594 extract_number (destination, *source);
595 *source += 2;
596}
597
86187531
UD
598# ifndef EXTRACT_MACROS
599# undef EXTRACT_NUMBER_AND_INCR
600# define EXTRACT_NUMBER_AND_INCR(dest, src) \
2b83a2a4 601 extract_number_and_incr (&dest, &src)
86187531 602# endif /* not EXTRACT_MACROS */
2b83a2a4
RM
603
604#endif /* DEBUG */
605\f
606/* If DEBUG is defined, Regex prints many voluminous messages about what
607 it is doing (if the variable `debug' is nonzero). If linked with the
608 main program in `iregex.c', you can enter patterns and strings
609 interactively. And if linked with the main program in `main.c' and
610 the other test files, you can run the already-written tests. */
611
612#ifdef DEBUG
613
614/* We use standard I/O for debugging. */
86187531 615# include <stdio.h>
2b83a2a4
RM
616
617/* It is useful to test things that ``must'' be true when debugging. */
86187531 618# include <assert.h>
2b83a2a4 619
c4563d2d 620static int debug;
2b83a2a4 621
86187531
UD
622# define DEBUG_STATEMENT(e) e
623# define DEBUG_PRINT1(x) if (debug) printf (x)
624# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
625# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
626# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
627# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
2b83a2a4 628 if (debug) print_partial_compiled_pattern (s, e)
86187531 629# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
2b83a2a4
RM
630 if (debug) print_double_string (w, s1, sz1, s2, sz2)
631
632
633/* Print the fastmap in human-readable form. */
634
635void
636print_fastmap (fastmap)
637 char *fastmap;
638{
639 unsigned was_a_range = 0;
91c7b85d
RM
640 unsigned i = 0;
641
2b83a2a4
RM
642 while (i < (1 << BYTEWIDTH))
643 {
644 if (fastmap[i++])
645 {
646 was_a_range = 0;
647 putchar (i - 1);
648 while (i < (1 << BYTEWIDTH) && fastmap[i])
649 {
650 was_a_range = 1;
651 i++;
652 }
653 if (was_a_range)
654 {
655 printf ("-");
656 putchar (i - 1);
657 }
658 }
659 }
91c7b85d 660 putchar ('\n');
2b83a2a4
RM
661}
662
663
664/* Print a compiled pattern string in human-readable form, starting at
665 the START pointer into it and ending just before the pointer END. */
666
667void
668print_partial_compiled_pattern (start, end)
669 unsigned char *start;
670 unsigned char *end;
671{
672 int mcnt, mcnt2;
5929563f 673 unsigned char *p1;
2b83a2a4
RM
674 unsigned char *p = start;
675 unsigned char *pend = end;
676
677 if (start == NULL)
678 {
679 printf ("(null)\n");
680 return;
681 }
91c7b85d 682
2b83a2a4
RM
683 /* Loop over pattern commands. */
684 while (p < pend)
685 {
686 printf ("%d:\t", p - start);
687
688 switch ((re_opcode_t) *p++)
689 {
690 case no_op:
691 printf ("/no_op");
692 break;
693
694 case exactn:
695 mcnt = *p++;
696 printf ("/exactn/%d", mcnt);
697 do
698 {
699 putchar ('/');
700 putchar (*p++);
701 }
702 while (--mcnt);
703 break;
704
705 case start_memory:
706 mcnt = *p++;
707 printf ("/start_memory/%d/%d", mcnt, *p++);
708 break;
709
710 case stop_memory:
711 mcnt = *p++;
712 printf ("/stop_memory/%d/%d", mcnt, *p++);
713 break;
714
715 case duplicate:
716 printf ("/duplicate/%d", *p++);
717 break;
718
719 case anychar:
720 printf ("/anychar");
721 break;
722
723 case charset:
724 case charset_not:
725 {
726 register int c, last = -100;
727 register int in_range = 0;
728
729 printf ("/charset [%s",
730 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
91c7b85d 731
2b83a2a4
RM
732 assert (p + *p < pend);
733
734 for (c = 0; c < 256; c++)
735 if (c / 8 < *p
736 && (p[1 + (c/8)] & (1 << (c % 8))))
737 {
738 /* Are we starting a range? */
739 if (last + 1 == c && ! in_range)
740 {
741 putchar ('-');
742 in_range = 1;
743 }
744 /* Have we broken a range? */
745 else if (last + 1 != c && in_range)
746 {
747 putchar (last);
748 in_range = 0;
749 }
91c7b85d 750
2b83a2a4
RM
751 if (! in_range)
752 putchar (c);
753
754 last = c;
755 }
756
757 if (in_range)
758 putchar (last);
759
760 putchar (']');
761
762 p += 1 + *p;
763 }
764 break;
765
766 case begline:
767 printf ("/begline");
768 break;
769
770 case endline:
771 printf ("/endline");
772 break;
773
774 case on_failure_jump:
775 extract_number_and_incr (&mcnt, &p);
776 printf ("/on_failure_jump to %d", p + mcnt - start);
777 break;
778
779 case on_failure_keep_string_jump:
780 extract_number_and_incr (&mcnt, &p);
781 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
782 break;
783
784 case dummy_failure_jump:
785 extract_number_and_incr (&mcnt, &p);
786 printf ("/dummy_failure_jump to %d", p + mcnt - start);
787 break;
788
789 case push_dummy_failure:
790 printf ("/push_dummy_failure");
791 break;
91c7b85d 792
2b83a2a4
RM
793 case maybe_pop_jump:
794 extract_number_and_incr (&mcnt, &p);
795 printf ("/maybe_pop_jump to %d", p + mcnt - start);
796 break;
797
798 case pop_failure_jump:
799 extract_number_and_incr (&mcnt, &p);
800 printf ("/pop_failure_jump to %d", p + mcnt - start);
91c7b85d
RM
801 break;
802
2b83a2a4
RM
803 case jump_past_alt:
804 extract_number_and_incr (&mcnt, &p);
805 printf ("/jump_past_alt to %d", p + mcnt - start);
91c7b85d
RM
806 break;
807
2b83a2a4
RM
808 case jump:
809 extract_number_and_incr (&mcnt, &p);
810 printf ("/jump to %d", p + mcnt - start);
811 break;
812
91c7b85d 813 case succeed_n:
2b83a2a4 814 extract_number_and_incr (&mcnt, &p);
5929563f 815 p1 = p + mcnt;
2b83a2a4 816 extract_number_and_incr (&mcnt2, &p);
5929563f 817 printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
2b83a2a4 818 break;
91c7b85d
RM
819
820 case jump_n:
2b83a2a4 821 extract_number_and_incr (&mcnt, &p);
5929563f 822 p1 = p + mcnt;
2b83a2a4 823 extract_number_and_incr (&mcnt2, &p);
5929563f 824 printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
2b83a2a4 825 break;
91c7b85d
RM
826
827 case set_number_at:
2b83a2a4 828 extract_number_and_incr (&mcnt, &p);
5929563f 829 p1 = p + mcnt;
2b83a2a4 830 extract_number_and_incr (&mcnt2, &p);
5929563f 831 printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
2b83a2a4 832 break;
91c7b85d 833
2b83a2a4
RM
834 case wordbound:
835 printf ("/wordbound");
836 break;
837
838 case notwordbound:
839 printf ("/notwordbound");
840 break;
841
842 case wordbeg:
843 printf ("/wordbeg");
844 break;
91c7b85d 845
2b83a2a4
RM
846 case wordend:
847 printf ("/wordend");
91c7b85d 848
86187531 849# ifdef emacs
2b83a2a4
RM
850 case before_dot:
851 printf ("/before_dot");
852 break;
853
854 case at_dot:
855 printf ("/at_dot");
856 break;
857
858 case after_dot:
859 printf ("/after_dot");
860 break;
861
862 case syntaxspec:
863 printf ("/syntaxspec");
864 mcnt = *p++;
865 printf ("/%d", mcnt);
866 break;
91c7b85d 867
2b83a2a4
RM
868 case notsyntaxspec:
869 printf ("/notsyntaxspec");
870 mcnt = *p++;
871 printf ("/%d", mcnt);
872 break;
86187531 873# endif /* emacs */
2b83a2a4
RM
874
875 case wordchar:
876 printf ("/wordchar");
877 break;
91c7b85d 878
2b83a2a4
RM
879 case notwordchar:
880 printf ("/notwordchar");
881 break;
882
883 case begbuf:
884 printf ("/begbuf");
885 break;
886
887 case endbuf:
888 printf ("/endbuf");
889 break;
890
891 default:
892 printf ("?%d", *(p-1));
893 }
894
895 putchar ('\n');
896 }
897
898 printf ("%d:\tend of pattern.\n", p - start);
899}
900
901
902void
903print_compiled_pattern (bufp)
904 struct re_pattern_buffer *bufp;
905{
906 unsigned char *buffer = bufp->buffer;
907
908 print_partial_compiled_pattern (buffer, buffer + bufp->used);
5929563f
UD
909 printf ("%ld bytes used/%ld bytes allocated.\n",
910 bufp->used, bufp->allocated);
2b83a2a4
RM
911
912 if (bufp->fastmap_accurate && bufp->fastmap)
913 {
914 printf ("fastmap: ");
915 print_fastmap (bufp->fastmap);
916 }
917
918 printf ("re_nsub: %d\t", bufp->re_nsub);
919 printf ("regs_alloc: %d\t", bufp->regs_allocated);
920 printf ("can_be_null: %d\t", bufp->can_be_null);
921 printf ("newline_anchor: %d\n", bufp->newline_anchor);
922 printf ("no_sub: %d\t", bufp->no_sub);
923 printf ("not_bol: %d\t", bufp->not_bol);
924 printf ("not_eol: %d\t", bufp->not_eol);
5929563f 925 printf ("syntax: %lx\n", bufp->syntax);
2b83a2a4
RM
926 /* Perhaps we should print the translate table? */
927}
928
929
930void
931print_double_string (where, string1, size1, string2, size2)
932 const char *where;
933 const char *string1;
934 const char *string2;
935 int size1;
936 int size2;
937{
5929563f 938 int this_char;
91c7b85d 939
2b83a2a4
RM
940 if (where == NULL)
941 printf ("(null)");
942 else
943 {
944 if (FIRST_STRING_P (where))
945 {
946 for (this_char = where - string1; this_char < size1; this_char++)
947 putchar (string1[this_char]);
948
91c7b85d 949 where = string2;
2b83a2a4
RM
950 }
951
952 for (this_char = where - string2; this_char < size2; this_char++)
953 putchar (string2[this_char]);
954 }
955}
956
4cca6b86
UD
957void
958printchar (c)
959 int c;
960{
961 putc (c, stderr);
962}
963
2b83a2a4
RM
964#else /* not DEBUG */
965
86187531
UD
966# undef assert
967# define assert(e)
2b83a2a4 968
86187531
UD
969# define DEBUG_STATEMENT(e)
970# define DEBUG_PRINT1(x)
971# define DEBUG_PRINT2(x1, x2)
972# define DEBUG_PRINT3(x1, x2, x3)
973# define DEBUG_PRINT4(x1, x2, x3, x4)
974# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
975# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
2b83a2a4
RM
976
977#endif /* not DEBUG */
978\f
979/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
980 also be assigned to arbitrarily: each pattern buffer stores its own
981 syntax, so it can be changed between regex compilations. */
b9337b6a
UD
982/* This has no initializer because initialized variables in Emacs
983 become read-only after dumping. */
2b83a2a4
RM
984reg_syntax_t re_syntax_options;
985
986
987/* Specify the precise syntax of regexps for compilation. This provides
988 for compatibility for various utilities which historically have
989 different, incompatible syntaxes.
990
991 The argument SYNTAX is a bit mask comprised of the various bits
992 defined in regex.h. We return the old syntax. */
993
994reg_syntax_t
995re_set_syntax (syntax)
996 reg_syntax_t syntax;
997{
998 reg_syntax_t ret = re_syntax_options;
91c7b85d 999
2b83a2a4 1000 re_syntax_options = syntax;
51702635
UD
1001#ifdef DEBUG
1002 if (syntax & RE_DEBUG)
1003 debug = 1;
1004 else if (debug) /* was on but now is not */
1005 debug = 0;
1006#endif /* DEBUG */
2b83a2a4
RM
1007 return ret;
1008}
2ad4fab2
UD
1009#ifdef _LIBC
1010weak_alias (__re_set_syntax, re_set_syntax)
1011#endif
2b83a2a4
RM
1012\f
1013/* This table gives an error message for each of the error codes listed
1014 in regex.h. Obviously the order here has to be same as there.
1015 POSIX doesn't require that we do anything for REG_NOERROR,
1016 but why not be nice? */
1017
c4563d2d 1018static const char re_error_msgid[] =
91c7b85d 1019 {
c4563d2d
UD
1020#define REG_NOERROR_IDX 0
1021 gettext_noop ("Success") /* REG_NOERROR */
1022 "\0"
1023#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
1024 gettext_noop ("No match") /* REG_NOMATCH */
1025 "\0"
1026#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
1027 gettext_noop ("Invalid regular expression") /* REG_BADPAT */
1028 "\0"
1029#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
d0738b5d 1030 gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
c4563d2d
UD
1031 "\0"
1032#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
1033 gettext_noop ("Invalid character class name") /* REG_ECTYPE */
1034 "\0"
1035#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
1036 gettext_noop ("Trailing backslash") /* REG_EESCAPE */
1037 "\0"
1038#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
1039 gettext_noop ("Invalid back reference") /* REG_ESUBREG */
1040 "\0"
1041#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
1042 gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
1043 "\0"
1044#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
1045 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
1046 "\0"
1047#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
1048 gettext_noop ("Unmatched \\{") /* REG_EBRACE */
1049 "\0"
1050#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
1051 gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
1052 "\0"
1053#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
1054 gettext_noop ("Invalid range end") /* REG_ERANGE */
1055 "\0"
1056#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
1057 gettext_noop ("Memory exhausted") /* REG_ESPACE */
1058 "\0"
1059#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
1060 gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
1061 "\0"
1062#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
1063 gettext_noop ("Premature end of regular expression") /* REG_EEND */
1064 "\0"
1065#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
1066 gettext_noop ("Regular expression too big") /* REG_ESIZE */
1067 "\0"
1068#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
d0738b5d 1069 gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
2b83a2a4 1070 };
c4563d2d
UD
1071
1072static const size_t re_error_msgid_idx[] =
1073 {
1074 REG_NOERROR_IDX,
1075 REG_NOMATCH_IDX,
1076 REG_BADPAT_IDX,
1077 REG_ECOLLATE_IDX,
1078 REG_ECTYPE_IDX,
1079 REG_EESCAPE_IDX,
1080 REG_ESUBREG_IDX,
1081 REG_EBRACK_IDX,
1082 REG_EPAREN_IDX,
1083 REG_EBRACE_IDX,
1084 REG_BADBR_IDX,
1085 REG_ERANGE_IDX,
1086 REG_ESPACE_IDX,
1087 REG_BADRPT_IDX,
1088 REG_EEND_IDX,
1089 REG_ESIZE_IDX,
1090 REG_ERPAREN_IDX
1091 };
2b83a2a4
RM
1092\f
1093/* Avoiding alloca during matching, to placate r_alloc. */
1094
1095/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1096 searching and matching functions should not call alloca. On some
1097 systems, alloca is implemented in terms of malloc, and if we're
1098 using the relocating allocator routines, then malloc could cause a
1099 relocation, which might (if the strings being searched are in the
1100 ralloc heap) shift the data out from underneath the regexp
1101 routines.
1102
91c7b85d 1103 Here's another reason to avoid allocation: Emacs
2b83a2a4
RM
1104 processes input from X in a signal handler; processing X input may
1105 call malloc; if input arrives while a matching routine is calling
1106 malloc, then we're scrod. But Emacs can't just block input while
1107 calling matching routines; then we don't notice interrupts when
1108 they come in. So, Emacs blocks input around all regexp calls
1109 except the matching calls, which it leaves unprotected, in the
1110 faith that they will not malloc. */
1111
1112/* Normally, this is fine. */
1113#define MATCH_MAY_ALLOCATE
1114
1115/* When using GNU C, we are not REALLY using the C alloca, no matter
1116 what config.h may say. So don't take precautions for it. */
1117#ifdef __GNUC__
86187531 1118# undef C_ALLOCA
2b83a2a4
RM
1119#endif
1120
1121/* The match routines may not allocate if (1) they would do it with malloc
1122 and (2) it's not safe for them to use malloc.
1123 Note that if REL_ALLOC is defined, matching would not use malloc for the
1124 failure stack, but we would still use it for the register vectors;
1125 so REL_ALLOC should not affect this. */
86187531
UD
1126#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1127# undef MATCH_MAY_ALLOCATE
2b83a2a4
RM
1128#endif
1129
1130\f
1131/* Failure stack declarations and macros; both re_compile_fastmap and
1132 re_match_2 use a failure stack. These have to be macros because of
1133 REGEX_ALLOCATE_STACK. */
91c7b85d 1134
2b83a2a4
RM
1135
1136/* Number of failure points for which to initially allocate space
1137 when matching. If this number is exceeded, we allocate more
1138 space, so it is not a hard limit. */
1139#ifndef INIT_FAILURE_ALLOC
86187531 1140# define INIT_FAILURE_ALLOC 5
2b83a2a4
RM
1141#endif
1142
1143/* Roughly the maximum number of failure points on the stack. Would be
51702635 1144 exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
2b83a2a4
RM
1145 This is a variable only so users of regex can assign to it; we never
1146 change it ourselves. */
4cca6b86
UD
1147
1148#ifdef INT_IS_16BIT
1149
86187531 1150# if defined MATCH_MAY_ALLOCATE
51702635
UD
1151/* 4400 was enough to cause a crash on Alpha OSF/1,
1152 whose default stack limit is 2mb. */
1153long int re_max_failures = 4000;
86187531 1154# else
51702635 1155long int re_max_failures = 2000;
86187531 1156# endif
4cca6b86
UD
1157
1158union fail_stack_elt
1159{
1160 unsigned char *pointer;
51702635 1161 long int integer;
4cca6b86
UD
1162};
1163
1164typedef union fail_stack_elt fail_stack_elt_t;
1165
1166typedef struct
1167{
1168 fail_stack_elt_t *stack;
51702635
UD
1169 unsigned long int size;
1170 unsigned long int avail; /* Offset of next open position. */
4cca6b86
UD
1171} fail_stack_type;
1172
1173#else /* not INT_IS_16BIT */
1174
86187531 1175# if defined MATCH_MAY_ALLOCATE
5f0e6fc7 1176/* 4400 was enough to cause a crash on Alpha OSF/1,
710f7bab 1177 whose default stack limit is 2mb. */
51702635 1178int re_max_failures = 20000;
86187531 1179# else
2b83a2a4 1180int re_max_failures = 2000;
86187531 1181# endif
2b83a2a4
RM
1182
1183union fail_stack_elt
1184{
1185 unsigned char *pointer;
1186 int integer;
1187};
1188
1189typedef union fail_stack_elt fail_stack_elt_t;
1190
1191typedef struct
1192{
1193 fail_stack_elt_t *stack;
1194 unsigned size;
1195 unsigned avail; /* Offset of next open position. */
1196} fail_stack_type;
1197
4cca6b86
UD
1198#endif /* INT_IS_16BIT */
1199
2b83a2a4
RM
1200#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
1201#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1202#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1203
1204
1205/* Define macros to initialize and free the failure stack.
1206 Do `return -2' if the alloc fails. */
1207
1208#ifdef MATCH_MAY_ALLOCATE
86187531 1209# define INIT_FAIL_STACK() \
2b83a2a4
RM
1210 do { \
1211 fail_stack.stack = (fail_stack_elt_t *) \
86187531 1212 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
2b83a2a4
RM
1213 \
1214 if (fail_stack.stack == NULL) \
1215 return -2; \
1216 \
1217 fail_stack.size = INIT_FAILURE_ALLOC; \
1218 fail_stack.avail = 0; \
1219 } while (0)
1220
86187531 1221# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
2b83a2a4 1222#else
86187531 1223# define INIT_FAIL_STACK() \
2b83a2a4
RM
1224 do { \
1225 fail_stack.avail = 0; \
1226 } while (0)
1227
86187531 1228# define RESET_FAIL_STACK()
2b83a2a4
RM
1229#endif
1230
1231
1232/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1233
1234 Return 1 if succeeds, and 0 if either ran out of memory
91c7b85d
RM
1235 allocating space for it or it was already too large.
1236
2b83a2a4
RM
1237 REGEX_REALLOCATE_STACK requires `destination' be declared. */
1238
1239#define DOUBLE_FAIL_STACK(fail_stack) \
cccda09f 1240 ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
2b83a2a4
RM
1241 ? 0 \
1242 : ((fail_stack).stack = (fail_stack_elt_t *) \
1243 REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1244 (fail_stack).size * sizeof (fail_stack_elt_t), \
1245 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
1246 \
1247 (fail_stack).stack == NULL \
1248 ? 0 \
1249 : ((fail_stack).size <<= 1, \
1250 1)))
1251
1252
91c7b85d 1253/* Push pointer POINTER on FAIL_STACK.
2b83a2a4
RM
1254 Return 1 if was able to do so and 0 if ran out of memory allocating
1255 space to do so. */
1256#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
1257 ((FAIL_STACK_FULL () \
1258 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
1259 ? 0 \
1260 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1261 1))
1262
1263/* Push a pointer value onto the failure stack.
1264 Assumes the variable `fail_stack'. Probably should only
1265 be called from within `PUSH_FAILURE_POINT'. */
1266#define PUSH_FAILURE_POINTER(item) \
1267 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1268
1269/* This pushes an integer-valued item onto the failure stack.
1270 Assumes the variable `fail_stack'. Probably should only
1271 be called from within `PUSH_FAILURE_POINT'. */
1272#define PUSH_FAILURE_INT(item) \
1273 fail_stack.stack[fail_stack.avail++].integer = (item)
1274
1275/* Push a fail_stack_elt_t value onto the failure stack.
1276 Assumes the variable `fail_stack'. Probably should only
1277 be called from within `PUSH_FAILURE_POINT'. */
1278#define PUSH_FAILURE_ELT(item) \
1279 fail_stack.stack[fail_stack.avail++] = (item)
1280
1281/* These three POP... operations complement the three PUSH... operations.
1282 All assume that `fail_stack' is nonempty. */
1283#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1284#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1285#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1286
1287/* Used to omit pushing failure point id's when we're not debugging. */
1288#ifdef DEBUG
86187531
UD
1289# define DEBUG_PUSH PUSH_FAILURE_INT
1290# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
2b83a2a4 1291#else
86187531
UD
1292# define DEBUG_PUSH(item)
1293# define DEBUG_POP(item_addr)
2b83a2a4
RM
1294#endif
1295
1296
1297/* Push the information about the state we will need
91c7b85d
RM
1298 if we ever fail back to it.
1299
2b83a2a4 1300 Requires variables fail_stack, regstart, regend, reg_info, and
789b13c4
UD
1301 num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
1302 be declared.
91c7b85d 1303
2b83a2a4
RM
1304 Does `return FAILURE_CODE' if runs out of memory. */
1305
1306#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
1307 do { \
1308 char *destination; \
1309 /* Must be int, so when we don't save any registers, the arithmetic \
1310 of 0 + -1 isn't done as unsigned. */ \
4cca6b86
UD
1311 /* Can't be int, since there is not a shred of a guarantee that int \
1312 is wide enough to hold a value of something to which pointer can \
1313 be assigned */ \
bca973bc 1314 active_reg_t this_reg; \
2b83a2a4
RM
1315 \
1316 DEBUG_STATEMENT (failure_id++); \
1317 DEBUG_STATEMENT (nfailure_points_pushed++); \
1318 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1319 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
1320 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1321 \
bca973bc 1322 DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
2b83a2a4
RM
1323 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
1324 \
1325 /* Ensure we have enough space allocated for what we will push. */ \
1326 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
1327 { \
1328 if (!DOUBLE_FAIL_STACK (fail_stack)) \
1329 return failure_code; \
1330 \
1331 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
1332 (fail_stack).size); \
1333 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1334 } \
1335 \
1336 /* Push the info, starting with the registers. */ \
1337 DEBUG_PRINT1 ("\n"); \
1338 \
3bbceb12 1339 if (1) \
537257ae
MB
1340 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1341 this_reg++) \
1342 { \
bca973bc 1343 DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
537257ae 1344 DEBUG_STATEMENT (num_regs_pushed++); \
2b83a2a4 1345 \
bca973bc 1346 DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
537257ae 1347 PUSH_FAILURE_POINTER (regstart[this_reg]); \
2b83a2a4 1348 \
bca973bc 1349 DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
537257ae
MB
1350 PUSH_FAILURE_POINTER (regend[this_reg]); \
1351 \
bca973bc
UD
1352 DEBUG_PRINT2 (" info: %p\n ", \
1353 reg_info[this_reg].word.pointer); \
537257ae
MB
1354 DEBUG_PRINT2 (" match_null=%d", \
1355 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
1356 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
1357 DEBUG_PRINT2 (" matched_something=%d", \
1358 MATCHED_SOMETHING (reg_info[this_reg])); \
1359 DEBUG_PRINT2 (" ever_matched=%d", \
1360 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
1361 DEBUG_PRINT1 ("\n"); \
1362 PUSH_FAILURE_ELT (reg_info[this_reg].word); \
1363 } \
2b83a2a4 1364 \
bca973bc 1365 DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
2b83a2a4
RM
1366 PUSH_FAILURE_INT (lowest_active_reg); \
1367 \
bca973bc 1368 DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
2b83a2a4
RM
1369 PUSH_FAILURE_INT (highest_active_reg); \
1370 \
bca973bc 1371 DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
2b83a2a4
RM
1372 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1373 PUSH_FAILURE_POINTER (pattern_place); \
1374 \
bca973bc 1375 DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
2b83a2a4
RM
1376 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1377 size2); \
1378 DEBUG_PRINT1 ("'\n"); \
1379 PUSH_FAILURE_POINTER (string_place); \
1380 \
1381 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
1382 DEBUG_PUSH (failure_id); \
1383 } while (0)
1384
1385/* This is the number of items that are pushed and popped on the stack
1386 for each register. */
1387#define NUM_REG_ITEMS 3
1388
1389/* Individual items aside from the registers. */
1390#ifdef DEBUG
86187531 1391# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
2b83a2a4 1392#else
86187531 1393# define NUM_NONREG_ITEMS 4
2b83a2a4
RM
1394#endif
1395
1396/* We push at most this many items on the stack. */
a641835a
RM
1397/* We used to use (num_regs - 1), which is the number of registers
1398 this regexp will save; but that was changed to 5
1399 to avoid stack overflow for a regexp with lots of parens. */
1400#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2b83a2a4
RM
1401
1402/* We actually push this many items. */
537257ae 1403#define NUM_FAILURE_ITEMS \
3bbceb12 1404 (((0 \
537257ae
MB
1405 ? 0 : highest_active_reg - lowest_active_reg + 1) \
1406 * NUM_REG_ITEMS) \
1407 + NUM_NONREG_ITEMS)
2b83a2a4
RM
1408
1409/* How many items can still be added to the stack without overflowing it. */
1410#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1411
1412
1413/* Pops what PUSH_FAIL_STACK pushes.
1414
1415 We restore into the parameters, all of which should be lvalues:
1416 STR -- the saved data position.
1417 PAT -- the saved pattern position.
1418 LOW_REG, HIGH_REG -- the highest and lowest active registers.
1419 REGSTART, REGEND -- arrays of string positions.
1420 REG_INFO -- array of information about each subexpression.
91c7b85d 1421
2b83a2a4
RM
1422 Also assumes the variables `fail_stack' and (if debugging), `bufp',
1423 `pend', `string1', `size1', `string2', and `size2'. */
1424
1425#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1426{ \
bca973bc
UD
1427 DEBUG_STATEMENT (unsigned failure_id;) \
1428 active_reg_t this_reg; \
2b83a2a4
RM
1429 const unsigned char *string_temp; \
1430 \
1431 assert (!FAIL_STACK_EMPTY ()); \
1432 \
1433 /* Remove failure points and point to how many regs pushed. */ \
1434 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1435 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1436 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1437 \
1438 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
1439 \
1440 DEBUG_POP (&failure_id); \
1441 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
1442 \
1443 /* If the saved string location is NULL, it came from an \
1444 on_failure_keep_string_jump opcode, and we want to throw away the \
1445 saved NULL, thus retaining our current position in the string. */ \
1446 string_temp = POP_FAILURE_POINTER (); \
1447 if (string_temp != NULL) \
1448 str = (const char *) string_temp; \
1449 \
bca973bc 1450 DEBUG_PRINT2 (" Popping string %p: `", str); \
2b83a2a4
RM
1451 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1452 DEBUG_PRINT1 ("'\n"); \
1453 \
1454 pat = (unsigned char *) POP_FAILURE_POINTER (); \
bca973bc 1455 DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
2b83a2a4
RM
1456 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1457 \
1458 /* Restore register info. */ \
4cca6b86 1459 high_reg = (active_reg_t) POP_FAILURE_INT (); \
bca973bc 1460 DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
2b83a2a4 1461 \
4cca6b86 1462 low_reg = (active_reg_t) POP_FAILURE_INT (); \
bca973bc 1463 DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
2b83a2a4 1464 \
3bbceb12 1465 if (1) \
537257ae
MB
1466 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1467 { \
bca973bc 1468 DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
2b83a2a4 1469 \
537257ae 1470 reg_info[this_reg].word = POP_FAILURE_ELT (); \
bca973bc
UD
1471 DEBUG_PRINT2 (" info: %p\n", \
1472 reg_info[this_reg].word.pointer); \
2b83a2a4 1473 \
537257ae 1474 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
bca973bc 1475 DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
2b83a2a4 1476 \
537257ae 1477 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
bca973bc 1478 DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
537257ae 1479 } \
57aefafe
RM
1480 else \
1481 { \
1482 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1483 { \
3bbceb12 1484 reg_info[this_reg].word.integer = 0; \
57aefafe
RM
1485 regend[this_reg] = 0; \
1486 regstart[this_reg] = 0; \
1487 } \
1488 highest_active_reg = high_reg; \
1489 } \
2b83a2a4
RM
1490 \
1491 set_regs_matched_done = 0; \
1492 DEBUG_STATEMENT (nfailure_points_popped++); \
1493} /* POP_FAILURE_POINT */
1494
1495
1496\f
1497/* Structure for per-register (a.k.a. per-group) information.
1498 Other register information, such as the
1499 starting and ending positions (which are addresses), and the list of
1500 inner groups (which is a bits list) are maintained in separate
91c7b85d
RM
1501 variables.
1502
2b83a2a4
RM
1503 We are making a (strictly speaking) nonportable assumption here: that
1504 the compiler will pack our bit fields into something that fits into
1505 the type of `word', i.e., is something that fits into one item on the
1506 failure stack. */
1507
4cca6b86
UD
1508
1509/* Declarations and macros for re_match_2. */
1510
2b83a2a4
RM
1511typedef union
1512{
1513 fail_stack_elt_t word;
1514 struct
1515 {
1516 /* This field is one if this group can match the empty string,
1517 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
1518#define MATCH_NULL_UNSET_VALUE 3
1519 unsigned match_null_string_p : 2;
1520 unsigned is_active : 1;
1521 unsigned matched_something : 1;
1522 unsigned ever_matched_something : 1;
1523 } bits;
1524} register_info_type;
1525
1526#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
1527#define IS_ACTIVE(R) ((R).bits.is_active)
1528#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
1529#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
1530
1531
1532/* Call this when have matched a real character; it sets `matched' flags
1533 for the subexpressions which we are currently inside. Also records
1534 that those subexprs have matched. */
1535#define SET_REGS_MATCHED() \
1536 do \
1537 { \
1538 if (!set_regs_matched_done) \
1539 { \
4cca6b86 1540 active_reg_t r; \
2b83a2a4
RM
1541 set_regs_matched_done = 1; \
1542 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
1543 { \
1544 MATCHED_SOMETHING (reg_info[r]) \
1545 = EVER_MATCHED_SOMETHING (reg_info[r]) \
1546 = 1; \
1547 } \
1548 } \
1549 } \
1550 while (0)
1551
1552/* Registers are set to a sentinel when they haven't yet matched. */
1553static char reg_unset_dummy;
1554#define REG_UNSET_VALUE (&reg_unset_dummy)
1555#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1556\f
1557/* Subroutine declarations and macros for regex_compile. */
1558
4cca6b86
UD
1559static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1560 reg_syntax_t syntax,
1561 struct re_pattern_buffer *bufp));
1562static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1563static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1564 int arg1, int arg2));
1565static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1566 int arg, unsigned char *end));
1567static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1568 int arg1, int arg2, unsigned char *end));
1569static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1570 reg_syntax_t syntax));
1571static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1572 reg_syntax_t syntax));
ac8295d2
UD
1573static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start,
1574 const char **p_ptr,
4cca6b86
UD
1575 const char *pend,
1576 char *translate,
1577 reg_syntax_t syntax,
1578 unsigned char *b));
2b83a2a4 1579
91c7b85d 1580/* Fetch the next character in the uncompiled pattern---translating it
2b83a2a4
RM
1581 if necessary. Also cast from a signed character in the constant
1582 string passed to us by the user to an unsigned char that we can use
1583 as an array index (in, e.g., `translate'). */
03a75825 1584#ifndef PATFETCH
86187531 1585# define PATFETCH(c) \
2b83a2a4
RM
1586 do {if (p == pend) return REG_EEND; \
1587 c = (unsigned char) *p++; \
03a75825 1588 if (translate) c = (unsigned char) translate[c]; \
2b83a2a4 1589 } while (0)
03a75825 1590#endif
2b83a2a4
RM
1591
1592/* Fetch the next character in the uncompiled pattern, with no
1593 translation. */
1594#define PATFETCH_RAW(c) \
1595 do {if (p == pend) return REG_EEND; \
1596 c = (unsigned char) *p++; \
1597 } while (0)
1598
1599/* Go backwards one character in the pattern. */
1600#define PATUNFETCH p--
1601
1602
1603/* If `translate' is non-null, return translate[D], else just D. We
1604 cast the subscript to translate because some data is declared as
1605 `char *', to avoid warnings when a string constant is passed. But
1606 when we use a character as a subscript we must make it unsigned. */
03a75825 1607#ifndef TRANSLATE
86187531 1608# define TRANSLATE(d) \
03a75825
RM
1609 (translate ? (char) translate[(unsigned char) (d)] : (d))
1610#endif
2b83a2a4
RM
1611
1612
1613/* Macros for outputting the compiled pattern into `buffer'. */
1614
1615/* If the buffer isn't allocated when it comes in, use this. */
1616#define INIT_BUF_SIZE 32
1617
1618/* Make sure we have at least N more bytes of space in buffer. */
1619#define GET_BUFFER_SPACE(n) \
cccda09f 1620 while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
2b83a2a4
RM
1621 EXTEND_BUFFER ()
1622
1623/* Make sure we have one more byte of buffer space and then add C to it. */
1624#define BUF_PUSH(c) \
1625 do { \
1626 GET_BUFFER_SPACE (1); \
1627 *b++ = (unsigned char) (c); \
1628 } while (0)
1629
1630
1631/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
1632#define BUF_PUSH_2(c1, c2) \
1633 do { \
1634 GET_BUFFER_SPACE (2); \
1635 *b++ = (unsigned char) (c1); \
1636 *b++ = (unsigned char) (c2); \
1637 } while (0)
1638
1639
1640/* As with BUF_PUSH_2, except for three bytes. */
1641#define BUF_PUSH_3(c1, c2, c3) \
1642 do { \
1643 GET_BUFFER_SPACE (3); \
1644 *b++ = (unsigned char) (c1); \
1645 *b++ = (unsigned char) (c2); \
1646 *b++ = (unsigned char) (c3); \
1647 } while (0)
1648
1649
1650/* Store a jump with opcode OP at LOC to location TO. We store a
1651 relative address offset by the three bytes the jump itself occupies. */
1652#define STORE_JUMP(op, loc, to) \
4cca6b86 1653 store_op1 (op, loc, (int) ((to) - (loc) - 3))
2b83a2a4
RM
1654
1655/* Likewise, for a two-argument jump. */
1656#define STORE_JUMP2(op, loc, to, arg) \
4cca6b86 1657 store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
2b83a2a4
RM
1658
1659/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
1660#define INSERT_JUMP(op, loc, to) \
4cca6b86 1661 insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
2b83a2a4
RM
1662
1663/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
1664#define INSERT_JUMP2(op, loc, to, arg) \
4cca6b86 1665 insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
2b83a2a4
RM
1666
1667
1668/* This is not an arbitrary limit: the arguments which represent offsets
1669 into the pattern are two bytes long. So if 2^16 bytes turns out to
1670 be too small, many things would have to change. */
4cca6b86
UD
1671/* Any other compiler which, like MSC, has allocation limit below 2^16
1672 bytes will have to use approach similar to what was done below for
1673 MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
1674 reallocating to 0 bytes. Such thing is not going to work too well.
1675 You have been warned!! */
86187531 1676#if defined _MSC_VER && !defined WIN32
4cca6b86
UD
1677/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1678 The REALLOC define eliminates a flurry of conversion warnings,
1679 but is not required. */
86187531
UD
1680# define MAX_BUF_SIZE 65500L
1681# define REALLOC(p,s) realloc ((p), (size_t) (s))
4cca6b86 1682#else
86187531
UD
1683# define MAX_BUF_SIZE (1L << 16)
1684# define REALLOC(p,s) realloc ((p), (s))
4cca6b86 1685#endif
2b83a2a4
RM
1686
1687/* Extend the buffer by twice its current size via realloc and
1688 reset the pointers that pointed into the old block to point to the
1689 correct places in the new one. If extending the buffer results in it
1690 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
1691#define EXTEND_BUFFER() \
1692 do { \
1693 unsigned char *old_buffer = bufp->buffer; \
1694 if (bufp->allocated == MAX_BUF_SIZE) \
1695 return REG_ESIZE; \
1696 bufp->allocated <<= 1; \
1697 if (bufp->allocated > MAX_BUF_SIZE) \
1698 bufp->allocated = MAX_BUF_SIZE; \
4cca6b86 1699 bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
2b83a2a4
RM
1700 if (bufp->buffer == NULL) \
1701 return REG_ESPACE; \
1702 /* If the buffer moved, move all the pointers into it. */ \
1703 if (old_buffer != bufp->buffer) \
1704 { \
1705 b = (b - old_buffer) + bufp->buffer; \
1706 begalt = (begalt - old_buffer) + bufp->buffer; \
1707 if (fixup_alt_jump) \
1708 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1709 if (laststart) \
1710 laststart = (laststart - old_buffer) + bufp->buffer; \
1711 if (pending_exact) \
1712 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
1713 } \
1714 } while (0)
1715
1716
1717/* Since we have one byte reserved for the register number argument to
1718 {start,stop}_memory, the maximum number of groups we can report
1719 things about is what fits in that byte. */
1720#define MAX_REGNUM 255
1721
1722/* But patterns can have more than `MAX_REGNUM' registers. We just
1723 ignore the excess. */
1724typedef unsigned regnum_t;
1725
1726
1727/* Macros for the compile stack. */
1728
1729/* Since offsets can go either forwards or backwards, this type needs to
1730 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
4cca6b86
UD
1731/* int may be not enough when sizeof(int) == 2. */
1732typedef long pattern_offset_t;
2b83a2a4
RM
1733
1734typedef struct
1735{
1736 pattern_offset_t begalt_offset;
1737 pattern_offset_t fixup_alt_jump;
1738 pattern_offset_t inner_group_offset;
91c7b85d 1739 pattern_offset_t laststart_offset;
2b83a2a4
RM
1740 regnum_t regnum;
1741} compile_stack_elt_t;
1742
1743
1744typedef struct
1745{
1746 compile_stack_elt_t *stack;
1747 unsigned size;
1748 unsigned avail; /* Offset of next open position. */
1749} compile_stack_type;
1750
1751
1752#define INIT_COMPILE_STACK_SIZE 32
1753
1754#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1755#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1756
1757/* The next available element. */
1758#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1759
1760
1761/* Set the bit for character C in a list. */
1762#define SET_LIST_BIT(c) \
1763 (b[((unsigned char) (c)) / BYTEWIDTH] \
1764 |= 1 << (((unsigned char) c) % BYTEWIDTH))
1765
1766
1767/* Get the next unsigned number in the uncompiled pattern. */
1768#define GET_UNSIGNED_NUMBER(num) \
1769 { if (p != pend) \
1770 { \
1771 PATFETCH (c); \
1772 while (ISDIGIT (c)) \
1773 { \
1774 if (num < 0) \
1775 num = 0; \
1776 num = num * 10 + c - '0'; \
1777 if (p == pend) \
1778 break; \
1779 PATFETCH (c); \
1780 } \
1781 } \
91c7b85d 1782 }
2b83a2a4 1783
409dfcea 1784#if defined _LIBC || WIDE_CHAR_SUPPORT
51702635
UD
1785/* The GNU C library provides support for user-defined character classes
1786 and the functions from ISO C amendement 1. */
1787# ifdef CHARCLASS_NAME_MAX
1788# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1789# else
1790/* This shouldn't happen but some implementation might still have this
1791 problem. Use a reasonable default value. */
1792# define CHAR_CLASS_MAX_LENGTH 256
1793# endif
1794
2ad4fab2
UD
1795# ifdef _LIBC
1796# define IS_CHAR_CLASS(string) __wctype (string)
1797# else
1798# define IS_CHAR_CLASS(string) wctype (string)
1799# endif
51702635
UD
1800#else
1801# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
2b83a2a4 1802
51702635 1803# define IS_CHAR_CLASS(string) \
2b83a2a4
RM
1804 (STREQ (string, "alpha") || STREQ (string, "upper") \
1805 || STREQ (string, "lower") || STREQ (string, "digit") \
1806 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
1807 || STREQ (string, "space") || STREQ (string, "print") \
1808 || STREQ (string, "punct") || STREQ (string, "graph") \
1809 || STREQ (string, "cntrl") || STREQ (string, "blank"))
51702635 1810#endif
2b83a2a4
RM
1811\f
1812#ifndef MATCH_MAY_ALLOCATE
1813
1814/* If we cannot allocate large objects within re_match_2_internal,
1815 we make the fail stack and register vectors global.
1816 The fail stack, we grow to the maximum size when a regexp
1817 is compiled.
1818 The register vectors, we adjust in size each time we
1819 compile a regexp, according to the number of registers it needs. */
1820
1821static fail_stack_type fail_stack;
1822
1823/* Size with which the following vectors are currently allocated.
1824 That is so we can make them bigger as needed,
1825 but never make them smaller. */
1826static int regs_allocated_size;
1827
1828static const char ** regstart, ** regend;
1829static const char ** old_regstart, ** old_regend;
1830static const char **best_regstart, **best_regend;
91c7b85d 1831static register_info_type *reg_info;
2b83a2a4
RM
1832static const char **reg_dummy;
1833static register_info_type *reg_info_dummy;
1834
1835/* Make the register vectors big enough for NUM_REGS registers,
1836 but don't make them smaller. */
1837
1838static
1839regex_grow_registers (num_regs)
1840 int num_regs;
1841{
1842 if (num_regs > regs_allocated_size)
1843 {
1844 RETALLOC_IF (regstart, num_regs, const char *);
1845 RETALLOC_IF (regend, num_regs, const char *);
1846 RETALLOC_IF (old_regstart, num_regs, const char *);
1847 RETALLOC_IF (old_regend, num_regs, const char *);
1848 RETALLOC_IF (best_regstart, num_regs, const char *);
1849 RETALLOC_IF (best_regend, num_regs, const char *);
1850 RETALLOC_IF (reg_info, num_regs, register_info_type);
1851 RETALLOC_IF (reg_dummy, num_regs, const char *);
1852 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1853
1854 regs_allocated_size = num_regs;
1855 }
1856}
1857
1858#endif /* not MATCH_MAY_ALLOCATE */
1859\f
4cca6b86
UD
1860static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1861 compile_stack,
1862 regnum_t regnum));
1863
2b83a2a4
RM
1864/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1865 Returns one of error codes defined in `regex.h', or zero for success.
1866
1867 Assumes the `allocated' (and perhaps `buffer') and `translate'
1868 fields are set in BUFP on entry.
1869
1870 If it succeeds, results are put in BUFP (if it returns an error, the
1871 contents of BUFP are undefined):
1872 `buffer' is the compiled pattern;
1873 `syntax' is set to SYNTAX;
1874 `used' is set to the length of the compiled pattern;
1875 `fastmap_accurate' is zero;
1876 `re_nsub' is the number of subexpressions in PATTERN;
1877 `not_bol' and `not_eol' are zero;
91c7b85d 1878
2b83a2a4
RM
1879 The `fastmap' and `newline_anchor' fields are neither
1880 examined nor set. */
1881
1882/* Return, freeing storage we allocated. */
1883#define FREE_STACK_RETURN(value) \
1884 return (free (compile_stack.stack), value)
1885
1886static reg_errcode_t
1887regex_compile (pattern, size, syntax, bufp)
1888 const char *pattern;
4cca6b86 1889 size_t size;
2b83a2a4
RM
1890 reg_syntax_t syntax;
1891 struct re_pattern_buffer *bufp;
1892{
1893 /* We fetch characters from PATTERN here. Even though PATTERN is
1894 `char *' (i.e., signed), we declare these variables as unsigned, so
1895 they can be reliably used as array indices. */
1896 register unsigned char c, c1;
91c7b85d 1897
2b83a2a4
RM
1898 /* A random temporary spot in PATTERN. */
1899 const char *p1;
1900
1901 /* Points to the end of the buffer, where we should append. */
1902 register unsigned char *b;
91c7b85d 1903
2b83a2a4
RM
1904 /* Keeps track of unclosed groups. */
1905 compile_stack_type compile_stack;
1906
1907 /* Points to the current (ending) position in the pattern. */
1908 const char *p = pattern;
1909 const char *pend = pattern + size;
91c7b85d 1910
2b83a2a4 1911 /* How to translate the characters in the pattern. */
03a75825 1912 RE_TRANSLATE_TYPE translate = bufp->translate;
2b83a2a4
RM
1913
1914 /* Address of the count-byte of the most recently inserted `exactn'
1915 command. This makes it possible to tell if a new exact-match
1916 character can be added to that command or if the character requires
1917 a new `exactn' command. */
1918 unsigned char *pending_exact = 0;
1919
1920 /* Address of start of the most recently finished expression.
1921 This tells, e.g., postfix * where to find the start of its
1922 operand. Reset at the beginning of groups and alternatives. */
1923 unsigned char *laststart = 0;
1924
1925 /* Address of beginning of regexp, or inside of last group. */
1926 unsigned char *begalt;
1927
1928 /* Place in the uncompiled pattern (i.e., the {) to
1929 which to go back if the interval is invalid. */
1930 const char *beg_interval;
91c7b85d 1931
2b83a2a4
RM
1932 /* Address of the place where a forward jump should go to the end of
1933 the containing expression. Each alternative of an `or' -- except the
1934 last -- ends with a forward jump of this sort. */
1935 unsigned char *fixup_alt_jump = 0;
1936
1937 /* Counts open-groups as they are encountered. Remembered for the
1938 matching close-group on the compile stack, so the same register
1939 number is put in the stop_memory as the start_memory. */
1940 regnum_t regnum = 0;
1941
1942#ifdef DEBUG
1943 DEBUG_PRINT1 ("\nCompiling pattern: ");
1944 if (debug)
1945 {
1946 unsigned debug_count;
91c7b85d 1947
2b83a2a4
RM
1948 for (debug_count = 0; debug_count < size; debug_count++)
1949 putchar (pattern[debug_count]);
1950 putchar ('\n');
1951 }
1952#endif /* DEBUG */
1953
1954 /* Initialize the compile stack. */
1955 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1956 if (compile_stack.stack == NULL)
1957 return REG_ESPACE;
1958
1959 compile_stack.size = INIT_COMPILE_STACK_SIZE;
1960 compile_stack.avail = 0;
1961
1962 /* Initialize the pattern buffer. */
1963 bufp->syntax = syntax;
1964 bufp->fastmap_accurate = 0;
1965 bufp->not_bol = bufp->not_eol = 0;
1966
1967 /* Set `used' to zero, so that if we return an error, the pattern
1968 printer (for debugging) will think there's no pattern. We reset it
1969 at the end. */
1970 bufp->used = 0;
91c7b85d 1971
2b83a2a4 1972 /* Always count groups, whether or not bufp->no_sub is set. */
91c7b85d 1973 bufp->re_nsub = 0;
2b83a2a4 1974
86187531 1975#if !defined emacs && !defined SYNTAX_TABLE
2b83a2a4
RM
1976 /* Initialize the syntax table. */
1977 init_syntax_once ();
1978#endif
1979
1980 if (bufp->allocated == 0)
1981 {
1982 if (bufp->buffer)
1983 { /* If zero allocated, but buffer is non-null, try to realloc
1984 enough space. This loses if buffer's address is bogus, but
1985 that is the user's responsibility. */
1986 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1987 }
1988 else
1989 { /* Caller did not allocate a buffer. Do it for them. */
1990 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1991 }
1992 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1993
1994 bufp->allocated = INIT_BUF_SIZE;
1995 }
1996
1997 begalt = b = bufp->buffer;
1998
1999 /* Loop through the uncompiled pattern until we're at the end. */
2000 while (p != pend)
2001 {
2002 PATFETCH (c);
2003
2004 switch (c)
2005 {
2006 case '^':
2007 {
2008 if ( /* If at start of pattern, it's an operator. */
2009 p == pattern + 1
2010 /* If context independent, it's an operator. */
2011 || syntax & RE_CONTEXT_INDEP_ANCHORS
2012 /* Otherwise, depends on what's come before. */
2013 || at_begline_loc_p (pattern, p, syntax))
2014 BUF_PUSH (begline);
2015 else
2016 goto normal_char;
2017 }
2018 break;
2019
2020
2021 case '$':
2022 {
2023 if ( /* If at end of pattern, it's an operator. */
91c7b85d 2024 p == pend
2b83a2a4
RM
2025 /* If context independent, it's an operator. */
2026 || syntax & RE_CONTEXT_INDEP_ANCHORS
2027 /* Otherwise, depends on what's next. */
2028 || at_endline_loc_p (p, pend, syntax))
2029 BUF_PUSH (endline);
2030 else
2031 goto normal_char;
2032 }
2033 break;
2034
2035
2036 case '+':
2037 case '?':
2038 if ((syntax & RE_BK_PLUS_QM)
2039 || (syntax & RE_LIMITED_OPS))
2040 goto normal_char;
2041 handle_plus:
2042 case '*':
2043 /* If there is no previous pattern... */
2044 if (!laststart)
2045 {
2046 if (syntax & RE_CONTEXT_INVALID_OPS)
2047 FREE_STACK_RETURN (REG_BADRPT);
2048 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2049 goto normal_char;
2050 }
2051
2052 {
2053 /* Are we optimizing this jump? */
2054 boolean keep_string_p = false;
91c7b85d 2055
2b83a2a4
RM
2056 /* 1 means zero (many) matches is allowed. */
2057 char zero_times_ok = 0, many_times_ok = 0;
2058
2059 /* If there is a sequence of repetition chars, collapse it
2060 down to just one (the right one). We can't combine
2061 interval operators with these because of, e.g., `a{2}*',
2062 which should only match an even number of `a's. */
2063
2064 for (;;)
2065 {
2066 zero_times_ok |= c != '+';
2067 many_times_ok |= c != '?';
2068
2069 if (p == pend)
2070 break;
2071
2072 PATFETCH (c);
2073
2074 if (c == '*'
2075 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2076 ;
2077
2078 else if (syntax & RE_BK_PLUS_QM && c == '\\')
2079 {
2080 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2081
2082 PATFETCH (c1);
2083 if (!(c1 == '+' || c1 == '?'))
2084 {
2085 PATUNFETCH;
2086 PATUNFETCH;
2087 break;
2088 }
2089
2090 c = c1;
2091 }
2092 else
2093 {
2094 PATUNFETCH;
2095 break;
2096 }
2097
2098 /* If we get here, we found another repeat character. */
2099 }
2100
2101 /* Star, etc. applied to an empty pattern is equivalent
2102 to an empty pattern. */
91c7b85d 2103 if (!laststart)
2b83a2a4
RM
2104 break;
2105
2106 /* Now we know whether or not zero matches is allowed
2107 and also whether or not two or more matches is allowed. */
2108 if (many_times_ok)
2109 { /* More than one repetition is allowed, so put in at the
2110 end a backward relative jump from `b' to before the next
2111 jump we're going to put in below (which jumps from
91c7b85d 2112 laststart to after this jump).
2b83a2a4
RM
2113
2114 But if we are at the `*' in the exact sequence `.*\n',
2115 insert an unconditional jump backwards to the .,
2116 instead of the beginning of the loop. This way we only
2117 push a failure point once, instead of every time
2118 through the loop. */
2119 assert (p - 1 > pattern);
2120
2121 /* Allocate the space for the jump. */
2122 GET_BUFFER_SPACE (3);
2123
2124 /* We know we are not at the first character of the pattern,
2125 because laststart was nonzero. And we've already
2126 incremented `p', by the way, to be the character after
2127 the `*'. Do we have to do something analogous here
2128 for null bytes, because of RE_DOT_NOT_NULL? */
2129 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2130 && zero_times_ok
2131 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2132 && !(syntax & RE_DOT_NEWLINE))
2133 { /* We have .*\n. */
2134 STORE_JUMP (jump, b, laststart);
2135 keep_string_p = true;
2136 }
2137 else
2138 /* Anything else. */
2139 STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2140
2141 /* We've added more stuff to the buffer. */
2142 b += 3;
2143 }
2144
2145 /* On failure, jump from laststart to b + 3, which will be the
2146 end of the buffer after this jump is inserted. */
2147 GET_BUFFER_SPACE (3);
2148 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2149 : on_failure_jump,
2150 laststart, b + 3);
2151 pending_exact = 0;
2152 b += 3;
2153
2154 if (!zero_times_ok)
2155 {
2156 /* At least one repetition is required, so insert a
2157 `dummy_failure_jump' before the initial
2158 `on_failure_jump' instruction of the loop. This
2159 effects a skip over that instruction the first time
2160 we hit that loop. */
2161 GET_BUFFER_SPACE (3);
2162 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2163 b += 3;
2164 }
2165 }
2166 break;
2167
2168
2169 case '.':
2170 laststart = b;
2171 BUF_PUSH (anychar);
2172 break;
2173
2174
2175 case '[':
2176 {
2177 boolean had_char_class = false;
ac8295d2 2178 unsigned int range_start = 0xffffffff;
2b83a2a4
RM
2179
2180 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2181
2182 /* Ensure that we have enough space to push a charset: the
2183 opcode, the length count, and the bitset; 34 bytes in all. */
2184 GET_BUFFER_SPACE (34);
2185
2186 laststart = b;
2187
2188 /* We test `*p == '^' twice, instead of using an if
2189 statement, so we only need one BUF_PUSH. */
91c7b85d 2190 BUF_PUSH (*p == '^' ? charset_not : charset);
2b83a2a4
RM
2191 if (*p == '^')
2192 p++;
2193
2194 /* Remember the first position in the bracket expression. */
2195 p1 = p;
2196
2197 /* Push the number of bytes in the bitmap. */
2198 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2199
2200 /* Clear the whole map. */
2201 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2202
2203 /* charset_not matches newline according to a syntax bit. */
2204 if ((re_opcode_t) b[-2] == charset_not
2205 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2206 SET_LIST_BIT ('\n');
2207
2208 /* Read in characters and ranges, setting map bits. */
2209 for (;;)
2210 {
2211 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2212
2213 PATFETCH (c);
2214
2215 /* \ might escape characters inside [...] and [^...]. */
2216 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2217 {
2218 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2219
2220 PATFETCH (c1);
2221 SET_LIST_BIT (c1);
ac8295d2 2222 range_start = c1;
2b83a2a4
RM
2223 continue;
2224 }
2225
2226 /* Could be the end of the bracket expression. If it's
2227 not (i.e., when the bracket expression is `[]' so
2228 far), the ']' character bit gets set way below. */
2229 if (c == ']' && p != p1 + 1)
2230 break;
2231
2232 /* Look ahead to see if it's a range when the last thing
2233 was a character class. */
2234 if (had_char_class && c == '-' && *p != ']')
2235 FREE_STACK_RETURN (REG_ERANGE);
2236
2237 /* Look ahead to see if it's a range when the last thing
2238 was a character: if this is a hyphen not at the
2239 beginning or the end of a list, then it's the range
2240 operator. */
91c7b85d
RM
2241 if (c == '-'
2242 && !(p - 2 >= pattern && p[-2] == '[')
2b83a2a4
RM
2243 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2244 && *p != ']')
2245 {
2246 reg_errcode_t ret
ac8295d2
UD
2247 = compile_range (range_start, &p, pend, translate,
2248 syntax, b);
2b83a2a4 2249 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
ac8295d2 2250 range_start = 0xffffffff;
2b83a2a4
RM
2251 }
2252
2253 else if (p[0] == '-' && p[1] != ']')
2254 { /* This handles ranges made up of characters only. */
2255 reg_errcode_t ret;
2256
2257 /* Move past the `-'. */
2258 PATFETCH (c1);
91c7b85d 2259
ac8295d2 2260 ret = compile_range (c, &p, pend, translate, syntax, b);
2b83a2a4 2261 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
ac8295d2 2262 range_start = 0xffffffff;
2b83a2a4
RM
2263 }
2264
2265 /* See if we're at the beginning of a possible character
2266 class. */
2267
2268 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2269 { /* Leave room for the null. */
2270 char str[CHAR_CLASS_MAX_LENGTH + 1];
2271
2272 PATFETCH (c);
2273 c1 = 0;
2274
2275 /* If pattern is `[[:'. */
2276 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2277
2278 for (;;)
2279 {
2280 PATFETCH (c);
bece5ca7 2281 if ((c == ':' && *p == ']') || p == pend)
2b83a2a4 2282 break;
bece5ca7
UD
2283 if (c1 < CHAR_CLASS_MAX_LENGTH)
2284 str[c1++] = c;
2285 else
2286 /* This is in any case an invalid class name. */
2287 str[0] = '\0';
2b83a2a4
RM
2288 }
2289 str[c1] = '\0';
2290
68b50604 2291 /* If isn't a word bracketed by `[:' and `:]':
91c7b85d 2292 undo the ending character, the letters, and leave
2b83a2a4
RM
2293 the leading `:' and `[' (but set bits for them). */
2294 if (c == ':' && *p == ']')
2295 {
409dfcea 2296#if defined _LIBC || WIDE_CHAR_SUPPORT
51702635
UD
2297 boolean is_lower = STREQ (str, "lower");
2298 boolean is_upper = STREQ (str, "upper");
2299 wctype_t wt;
2300 int ch;
2301
2ad4fab2 2302 wt = IS_CHAR_CLASS (str);
51702635
UD
2303 if (wt == 0)
2304 FREE_STACK_RETURN (REG_ECTYPE);
2305
2306 /* Throw away the ] at the end of the character
2307 class. */
2308 PATFETCH (c);
2309
2310 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2311
2312 for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2313 {
2ad4fab2
UD
2314# ifdef _LIBC
2315 if (__iswctype (__btowc (ch), wt))
2316 SET_LIST_BIT (ch);
7ce241a0 2317# else
51702635
UD
2318 if (iswctype (btowc (ch), wt))
2319 SET_LIST_BIT (ch);
7ce241a0 2320# endif
51702635
UD
2321
2322 if (translate && (is_upper || is_lower)
2323 && (ISUPPER (ch) || ISLOWER (ch)))
2324 SET_LIST_BIT (ch);
2325 }
2326
2327 had_char_class = true;
2328#else
2b83a2a4
RM
2329 int ch;
2330 boolean is_alnum = STREQ (str, "alnum");
2331 boolean is_alpha = STREQ (str, "alpha");
2332 boolean is_blank = STREQ (str, "blank");
2333 boolean is_cntrl = STREQ (str, "cntrl");
2334 boolean is_digit = STREQ (str, "digit");
2335 boolean is_graph = STREQ (str, "graph");
2336 boolean is_lower = STREQ (str, "lower");
2337 boolean is_print = STREQ (str, "print");
2338 boolean is_punct = STREQ (str, "punct");
2339 boolean is_space = STREQ (str, "space");
2340 boolean is_upper = STREQ (str, "upper");
2341 boolean is_xdigit = STREQ (str, "xdigit");
91c7b85d 2342
2b83a2a4
RM
2343 if (!IS_CHAR_CLASS (str))
2344 FREE_STACK_RETURN (REG_ECTYPE);
2345
2346 /* Throw away the ] at the end of the character
2347 class. */
91c7b85d 2348 PATFETCH (c);
2b83a2a4
RM
2349
2350 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2351
2352 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2353 {
2354 /* This was split into 3 if's to
2355 avoid an arbitrary limit in some compiler. */
2356 if ( (is_alnum && ISALNUM (ch))
2357 || (is_alpha && ISALPHA (ch))
2358 || (is_blank && ISBLANK (ch))
2359 || (is_cntrl && ISCNTRL (ch)))
2360 SET_LIST_BIT (ch);
2361 if ( (is_digit && ISDIGIT (ch))
2362 || (is_graph && ISGRAPH (ch))
2363 || (is_lower && ISLOWER (ch))
2364 || (is_print && ISPRINT (ch)))
2365 SET_LIST_BIT (ch);
2366 if ( (is_punct && ISPUNCT (ch))
2367 || (is_space && ISSPACE (ch))
2368 || (is_upper && ISUPPER (ch))
2369 || (is_xdigit && ISXDIGIT (ch)))
2370 SET_LIST_BIT (ch);
4cca6b86
UD
2371 if ( translate && (is_upper || is_lower)
2372 && (ISUPPER (ch) || ISLOWER (ch)))
2373 SET_LIST_BIT (ch);
2b83a2a4
RM
2374 }
2375 had_char_class = true;
51702635 2376#endif /* libc || wctype.h */
2b83a2a4
RM
2377 }
2378 else
2379 {
2380 c1++;
91c7b85d 2381 while (c1--)
2b83a2a4
RM
2382 PATUNFETCH;
2383 SET_LIST_BIT ('[');
2384 SET_LIST_BIT (':');
ac8295d2 2385 range_start = ':';
2b83a2a4
RM
2386 had_char_class = false;
2387 }
2388 }
a63a3c2c
UD
2389 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
2390 {
2391 unsigned char str[MB_LEN_MAX + 1];
3216711f 2392#ifdef _LIBC
a63a3c2c
UD
2393 uint32_t nrules =
2394 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3216711f 2395#endif
a63a3c2c
UD
2396
2397 PATFETCH (c);
2398 c1 = 0;
2399
2400 /* If pattern is `[[='. */
2401 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2402
2403 for (;;)
2404 {
2405 PATFETCH (c);
2406 if ((c == '=' && *p == ']') || p == pend)
2407 break;
2408 if (c1 < MB_LEN_MAX)
2409 str[c1++] = c;
2410 else
2411 /* This is in any case an invalid class name. */
2412 str[0] = '\0';
2413 }
2414 str[c1] = '\0';
2415
2416 if (c == '=' && *p == ']' && str[0] != '\0')
2417 {
2418 /* If we have no collation data we use the default
2419 collation in which each character is in a class
2420 by itself. It also means that ASCII is the
2421 character set and therefore we cannot have character
2422 with more than one byte in the multibyte
2423 representation. */
3216711f 2424#ifdef _LIBC
a63a3c2c 2425 if (nrules == 0)
3216711f 2426#endif
a63a3c2c
UD
2427 {
2428 if (c1 != 1)
2429 FREE_STACK_RETURN (REG_ECOLLATE);
2430
2431 /* Throw away the ] at the end of the equivalence
2432 class. */
2433 PATFETCH (c);
2434
2435 /* Set the bit for the character. */
2436 SET_LIST_BIT (str[0]);
2437 }
3216711f 2438#ifdef _LIBC
a63a3c2c
UD
2439 else
2440 {
2441 /* Try to match the byte sequence in `str' against
2442 those known to the collate implementation.
2443 First find out whether the bytes in `str' are
2444 actually from exactly one character. */
2445 const int32_t *table;
2446 const unsigned char *weights;
2447 const unsigned char *extra;
2448 const int32_t *indirect;
2449 int32_t idx;
2450 const unsigned char *cp = str;
2451 int32_t weight;
2452 int ch;
2453
2454 /* This #include defines a local function! */
2455# include <locale/weight.h>
2456
2457 table = (const int32_t *)
2458 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
2459 weights = (const unsigned char *)
2460 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
2461 extra = (const unsigned char *)
2462 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
2463 indirect = (const int32_t *)
2464 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
2465
2466 idx = findidx (&cp);
2467 if (idx == 0 || cp < str + c1)
2468 /* This is no valid character. */
2469 FREE_STACK_RETURN (REG_ECOLLATE);
2470
2471 /* Throw away the ] at the end of the equivalence
2472 class. */
2473 PATFETCH (c);
2474
2475 /* Now we have to go throught the whole table
2476 and find all characters which have the same
2477 first level weight.
2478
2479 XXX Note that this is not entirely correct.
2480 we would have to match multibyte sequences
2481 but this is not possible with the current
2482 implementation. */
2483 for (ch = 1; ch < 256; ++ch)
2484 /* XXX This test would have to be changed if we
2485 would allow matching multibyte sequences. */
2486 if (table[ch] > 0)
2487 {
2488 int32_t idx2 = table[ch];
2489 size_t len = weights[idx2];
2490
2491 /* Test whether the lenghts match. */
2492 if (weights[idx] == len)
2493 {
2494 /* They do. New compare the bytes of
2495 the weight. */
2496 size_t cnt = 0;
2497
2498 while (cnt < len
2499 && (weights[idx + 1 + cnt]
2500 == weights[idx2 + 1 + cnt]))
2501 ++len;
2502
2503 if (cnt == len)
2504 /* They match. Mark the character as
2505 acceptable. */
2506 SET_LIST_BIT (ch);
2507 }
2508 }
2509 }
3216711f 2510#endif
a63a3c2c
UD
2511 had_char_class = true;
2512 }
ac8295d2
UD
2513 else
2514 {
2515 c1++;
2516 while (c1--)
2517 PATUNFETCH;
2518 SET_LIST_BIT ('[');
2519 SET_LIST_BIT ('=');
2520 range_start = '=';
2521 had_char_class = false;
2522 }
3216711f
UD
2523 }
2524 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
2525 {
2526 unsigned char str[128]; /* Should be large enough. */
2527#ifdef _LIBC
2528 uint32_t nrules =
2529 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2530#endif
2531
2532 PATFETCH (c);
2533 c1 = 0;
2534
2535 /* If pattern is `[[='. */
2536 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2537
2538 for (;;)
2539 {
2540 PATFETCH (c);
2541 if ((c == '.' && *p == ']') || p == pend)
2542 break;
2543 if (c1 < sizeof (str))
2544 str[c1++] = c;
2545 else
2546 /* This is in any case an invalid class name. */
2547 str[0] = '\0';
2548 }
2549 str[c1] = '\0';
2550
2551 if (c == '.' && *p == ']' && str[0] != '\0')
2552 {
2553 /* If we have no collation data we use the default
2554 collation in which each character is the name
2555 for its own class which contains only the one
2556 character. It also means that ASCII is the
2557 character set and therefore we cannot have character
2558 with more than one byte in the multibyte
2559 representation. */
2560#ifdef _LIBC
2561 if (nrules == 0)
2562#endif
2563 {
2564 if (c1 != 1)
2565 FREE_STACK_RETURN (REG_ECOLLATE);
2566
2567 /* Throw away the ] at the end of the equivalence
2568 class. */
2569 PATFETCH (c);
2570
2571 /* Set the bit for the character. */
2572 SET_LIST_BIT (str[0]);
ac8295d2 2573 range_start = ((const unsigned char *) str)[0];
3216711f
UD
2574 }
2575#ifdef _LIBC
2576 else
2577 {
2578 /* Try to match the byte sequence in `str' against
2579 those known to the collate implementation.
2580 First find out whether the bytes in `str' are
2581 actually from exactly one character. */
3216711f 2582 int32_t table_size;
3216711f
UD
2583 const int32_t *symb_table;
2584 const unsigned char *extra;
2585 int32_t idx;
2586 int32_t elem;
2587 const unsigned char *cp = str;
2588 int32_t weight;
2589 int32_t second;
2590 int32_t hash;
2591 int ch;
2592
3216711f
UD
2593 table_size =
2594 _NL_CURRENT_WORD (LC_COLLATE,
2595 _NL_COLLATE_SYMB_HASH_SIZEMB);
2596 symb_table = (const int32_t *)
2597 _NL_CURRENT (LC_COLLATE,
2598 _NL_COLLATE_SYMB_TABLEMB);
2599 extra = (const unsigned char *)
2600 _NL_CURRENT (LC_COLLATE,
2601 _NL_COLLATE_SYMB_EXTRAMB);
2602
2603 /* Locate the character in the hashing table. */
2604 hash = elem_hash (str, c1);
2605
2606 idx = 0;
2607 elem = hash % table_size;
2608 second = hash % (table_size - 2);
2609 while (symb_table[2 * elem] != 0)
2610 {
2611 /* First compare the hashing value. */
2612 if (symb_table[2 * elem] == hash
ac8295d2 2613 && c1 == extra[symb_table[2 * elem + 1]]
3216711f
UD
2614 && memcmp (str,
2615 &extra[symb_table[2 * elem + 1]
ac8295d2 2616 + 1],
3216711f
UD
2617 c1) == 0)
2618 {
2619 /* Yep, this is the entry. */
ac8295d2
UD
2620 idx = symb_table[2 * elem + 1];
2621 idx += 1 + extra[idx];
3216711f
UD
2622 break;
2623 }
2624
2625 /* Next entry. */
2626 elem += second;
2627 }
2628
2629 if (symb_table[2 * elem] == 0)
2630 /* This is no valid character. */
2631 FREE_STACK_RETURN (REG_ECOLLATE);
2632
2633 /* Throw away the ] at the end of the equivalence
2634 class. */
2635 PATFETCH (c);
2636
ac8295d2
UD
2637 /* Now add the multibyte character(s) we found
2638 to the acceptabed list.
3216711f
UD
2639
2640 XXX Note that this is not entirely correct.
2641 we would have to match multibyte sequences
2642 but this is not possible with the current
ac8295d2
UD
2643 implementation. Also, we have to match
2644 collating symbols, which expand to more than
2645 one file, as a whole and not allow the
2646 individual bytes. */
2647 c1 = extra[idx++];
2648 if (c1 == 1)
2649 range_start = extra[idx];
2650 while (c1-- > 0)
2651 SET_LIST_BIT (extra[idx++]);
3216711f
UD
2652 }
2653#endif
2654 had_char_class = false;
2655 }
a63a3c2c
UD
2656 else
2657 {
2658 c1++;
2659 while (c1--)
2660 PATUNFETCH;
2661 SET_LIST_BIT ('[');
ac8295d2
UD
2662 SET_LIST_BIT ('.');
2663 range_start = '.';
a63a3c2c
UD
2664 had_char_class = false;
2665 }
2666 }
2b83a2a4
RM
2667 else
2668 {
2669 had_char_class = false;
2670 SET_LIST_BIT (c);
ac8295d2 2671 range_start = c;
2b83a2a4
RM
2672 }
2673 }
2674
2675 /* Discard any (non)matching list bytes that are all 0 at the
2676 end of the map. Decrease the map-length byte too. */
91c7b85d
RM
2677 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2678 b[-1]--;
2b83a2a4
RM
2679 b += b[-1];
2680 }
2681 break;
2682
2683
2684 case '(':
2685 if (syntax & RE_NO_BK_PARENS)
2686 goto handle_open;
2687 else
2688 goto normal_char;
2689
2690
2691 case ')':
2692 if (syntax & RE_NO_BK_PARENS)
2693 goto handle_close;
2694 else
2695 goto normal_char;
2696
2697
2698 case '\n':
2699 if (syntax & RE_NEWLINE_ALT)
2700 goto handle_alt;
2701 else
2702 goto normal_char;
2703
2704
2705 case '|':
2706 if (syntax & RE_NO_BK_VBAR)
2707 goto handle_alt;
2708 else
2709 goto normal_char;
2710
2711
2712 case '{':
2713 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2714 goto handle_interval;
2715 else
2716 goto normal_char;
2717
2718
2719 case '\\':
2720 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2721
2722 /* Do not translate the character after the \, so that we can
2723 distinguish, e.g., \B from \b, even if we normally would
2724 translate, e.g., B to b. */
2725 PATFETCH_RAW (c);
2726
2727 switch (c)
2728 {
2729 case '(':
2730 if (syntax & RE_NO_BK_PARENS)
2731 goto normal_backslash;
2732
2733 handle_open:
2734 bufp->re_nsub++;
2735 regnum++;
2736
2737 if (COMPILE_STACK_FULL)
91c7b85d 2738 {
2b83a2a4
RM
2739 RETALLOC (compile_stack.stack, compile_stack.size << 1,
2740 compile_stack_elt_t);
2741 if (compile_stack.stack == NULL) return REG_ESPACE;
2742
2743 compile_stack.size <<= 1;
2744 }
2745
2746 /* These are the values to restore when we hit end of this
2747 group. They are all relative offsets, so that if the
2748 whole pattern moves because of realloc, they will still
2749 be valid. */
2750 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
91c7b85d 2751 COMPILE_STACK_TOP.fixup_alt_jump
2b83a2a4
RM
2752 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2753 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2754 COMPILE_STACK_TOP.regnum = regnum;
2755
2756 /* We will eventually replace the 0 with the number of
2757 groups inner to this one. But do not push a
2758 start_memory for groups beyond the last one we can
2759 represent in the compiled pattern. */
2760 if (regnum <= MAX_REGNUM)
2761 {
2762 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2763 BUF_PUSH_3 (start_memory, regnum, 0);
2764 }
91c7b85d 2765
2b83a2a4
RM
2766 compile_stack.avail++;
2767
2768 fixup_alt_jump = 0;
2769 laststart = 0;
2770 begalt = b;
2771 /* If we've reached MAX_REGNUM groups, then this open
2772 won't actually generate any code, so we'll have to
2773 clear pending_exact explicitly. */
2774 pending_exact = 0;
2775 break;
2776
2777
2778 case ')':
2779 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2780
2781 if (COMPILE_STACK_EMPTY)
07b51ba5
UD
2782 {
2783 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2784 goto normal_backslash;
2785 else
2786 FREE_STACK_RETURN (REG_ERPAREN);
2787 }
2b83a2a4
RM
2788
2789 handle_close:
2790 if (fixup_alt_jump)
2791 { /* Push a dummy failure point at the end of the
2792 alternative for a possible future
2793 `pop_failure_jump' to pop. See comments at
2794 `push_dummy_failure' in `re_match_2'. */
2795 BUF_PUSH (push_dummy_failure);
91c7b85d 2796
2b83a2a4
RM
2797 /* We allocated space for this jump when we assigned
2798 to `fixup_alt_jump', in the `handle_alt' case below. */
2799 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2800 }
2801
2802 /* See similar code for backslashed left paren above. */
2803 if (COMPILE_STACK_EMPTY)
07b51ba5
UD
2804 {
2805 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2806 goto normal_char;
2807 else
2808 FREE_STACK_RETURN (REG_ERPAREN);
2809 }
2b83a2a4
RM
2810
2811 /* Since we just checked for an empty stack above, this
2812 ``can't happen''. */
2813 assert (compile_stack.avail != 0);
2814 {
2815 /* We don't just want to restore into `regnum', because
2816 later groups should continue to be numbered higher,
2817 as in `(ab)c(de)' -- the second group is #2. */
2818 regnum_t this_group_regnum;
2819
91c7b85d 2820 compile_stack.avail--;
2b83a2a4
RM
2821 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2822 fixup_alt_jump
2823 = COMPILE_STACK_TOP.fixup_alt_jump
91c7b85d 2824 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2b83a2a4
RM
2825 : 0;
2826 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2827 this_group_regnum = COMPILE_STACK_TOP.regnum;
2828 /* If we've reached MAX_REGNUM groups, then this open
2829 won't actually generate any code, so we'll have to
2830 clear pending_exact explicitly. */
2831 pending_exact = 0;
2832
2833 /* We're at the end of the group, so now we know how many
2834 groups were inside this one. */
2835 if (this_group_regnum <= MAX_REGNUM)
2836 {
2837 unsigned char *inner_group_loc
2838 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
91c7b85d 2839
2b83a2a4
RM
2840 *inner_group_loc = regnum - this_group_regnum;
2841 BUF_PUSH_3 (stop_memory, this_group_regnum,
2842 regnum - this_group_regnum);
2843 }
2844 }
2845 break;
2846
2847
2848 case '|': /* `\|'. */
2849 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2850 goto normal_backslash;
2851 handle_alt:
2852 if (syntax & RE_LIMITED_OPS)
2853 goto normal_char;
2854
2855 /* Insert before the previous alternative a jump which
2856 jumps to this alternative if the former fails. */
2857 GET_BUFFER_SPACE (3);
2858 INSERT_JUMP (on_failure_jump, begalt, b + 6);
2859 pending_exact = 0;
2860 b += 3;
2861
2862 /* The alternative before this one has a jump after it
2863 which gets executed if it gets matched. Adjust that
2864 jump so it will jump to this alternative's analogous
2865 jump (put in below, which in turn will jump to the next
2866 (if any) alternative's such jump, etc.). The last such
2867 jump jumps to the correct final destination. A picture:
91c7b85d
RM
2868 _____ _____
2869 | | | |
2870 | v | v
2871 a | b | c
2b83a2a4
RM
2872
2873 If we are at `b', then fixup_alt_jump right now points to a
2874 three-byte space after `a'. We'll put in the jump, set
2875 fixup_alt_jump to right after `b', and leave behind three
2876 bytes which we'll fill in when we get to after `c'. */
2877
2878 if (fixup_alt_jump)
2879 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2880
2881 /* Mark and leave space for a jump after this alternative,
2882 to be filled in later either by next alternative or
2883 when know we're at the end of a series of alternatives. */
2884 fixup_alt_jump = b;
2885 GET_BUFFER_SPACE (3);
2886 b += 3;
2887
2888 laststart = 0;
2889 begalt = b;
2890 break;
2891
2892
91c7b85d 2893 case '{':
2b83a2a4
RM
2894 /* If \{ is a literal. */
2895 if (!(syntax & RE_INTERVALS)
91c7b85d 2896 /* If we're at `\{' and it's not the open-interval
2b83a2a4
RM
2897 operator. */
2898 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2899 || (p - 2 == pattern && p == pend))
2900 goto normal_backslash;
2901
2902 handle_interval:
2903 {
2904 /* If got here, then the syntax allows intervals. */
2905
2906 /* At least (most) this many matches must be made. */
2907 int lower_bound = -1, upper_bound = -1;
2908
2909 beg_interval = p - 1;
2910
2911 if (p == pend)
2912 {
2913 if (syntax & RE_NO_BK_BRACES)
2914 goto unfetch_interval;
2915 else
2916 FREE_STACK_RETURN (REG_EBRACE);
2917 }
2918
2919 GET_UNSIGNED_NUMBER (lower_bound);
2920
2921 if (c == ',')
2922 {
2923 GET_UNSIGNED_NUMBER (upper_bound);
2924 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2925 }
2926 else
2927 /* Interval such as `{1}' => match exactly once. */
2928 upper_bound = lower_bound;
2929
2930 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2931 || lower_bound > upper_bound)
2932 {
2933 if (syntax & RE_NO_BK_BRACES)
2934 goto unfetch_interval;
91c7b85d 2935 else
2b83a2a4
RM
2936 FREE_STACK_RETURN (REG_BADBR);
2937 }
2938
91c7b85d 2939 if (!(syntax & RE_NO_BK_BRACES))
2b83a2a4
RM
2940 {
2941 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2942
2943 PATFETCH (c);
2944 }
2945
2946 if (c != '}')
2947 {
2948 if (syntax & RE_NO_BK_BRACES)
2949 goto unfetch_interval;
91c7b85d 2950 else
2b83a2a4
RM
2951 FREE_STACK_RETURN (REG_BADBR);
2952 }
2953
2954 /* We just parsed a valid interval. */
2955
2956 /* If it's invalid to have no preceding re. */
2957 if (!laststart)
2958 {
2959 if (syntax & RE_CONTEXT_INVALID_OPS)
2960 FREE_STACK_RETURN (REG_BADRPT);
2961 else if (syntax & RE_CONTEXT_INDEP_OPS)
2962 laststart = b;
2963 else
2964 goto unfetch_interval;
2965 }
2966
2967 /* If the upper bound is zero, don't want to succeed at
2968 all; jump from `laststart' to `b + 3', which will be
2969 the end of the buffer after we insert the jump. */
2970 if (upper_bound == 0)
2971 {
2972 GET_BUFFER_SPACE (3);
2973 INSERT_JUMP (jump, laststart, b + 3);
2974 b += 3;
2975 }
2976
2977 /* Otherwise, we have a nontrivial interval. When
2978 we're all done, the pattern will look like:
2979 set_number_at <jump count> <upper bound>
2980 set_number_at <succeed_n count> <lower bound>
2981 succeed_n <after jump addr> <succeed_n count>
2982 <body of loop>
2983 jump_n <succeed_n addr> <jump count>
2984 (The upper bound and `jump_n' are omitted if
2985 `upper_bound' is 1, though.) */
91c7b85d 2986 else
2b83a2a4
RM
2987 { /* If the upper bound is > 1, we need to insert
2988 more at the end of the loop. */
2989 unsigned nbytes = 10 + (upper_bound > 1) * 10;
2990
2991 GET_BUFFER_SPACE (nbytes);
2992
2993 /* Initialize lower bound of the `succeed_n', even
2994 though it will be set during matching by its
2995 attendant `set_number_at' (inserted next),
2996 because `re_compile_fastmap' needs to know.
2997 Jump to the `jump_n' we might insert below. */
2998 INSERT_JUMP2 (succeed_n, laststart,
2999 b + 5 + (upper_bound > 1) * 5,
3000 lower_bound);
3001 b += 5;
3002
91c7b85d 3003 /* Code to initialize the lower bound. Insert
2b83a2a4
RM
3004 before the `succeed_n'. The `5' is the last two
3005 bytes of this `set_number_at', plus 3 bytes of
3006 the following `succeed_n'. */
3007 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
3008 b += 5;
3009
3010 if (upper_bound > 1)
3011 { /* More than one repetition is allowed, so
3012 append a backward jump to the `succeed_n'
3013 that starts this interval.
91c7b85d 3014
2b83a2a4
RM
3015 When we've reached this during matching,
3016 we'll have matched the interval once, so
3017 jump back only `upper_bound - 1' times. */
3018 STORE_JUMP2 (jump_n, b, laststart + 5,
3019 upper_bound - 1);
3020 b += 5;
3021
3022 /* The location we want to set is the second
3023 parameter of the `jump_n'; that is `b-2' as
3024 an absolute address. `laststart' will be
3025 the `set_number_at' we're about to insert;
3026 `laststart+3' the number to set, the source
3027 for the relative address. But we are
3028 inserting into the middle of the pattern --
3029 so everything is getting moved up by 5.
3030 Conclusion: (b - 2) - (laststart + 3) + 5,
3031 i.e., b - laststart.
91c7b85d 3032
2b83a2a4
RM
3033 We insert this at the beginning of the loop
3034 so that if we fail during matching, we'll
3035 reinitialize the bounds. */
3036 insert_op2 (set_number_at, laststart, b - laststart,
3037 upper_bound - 1, b);
3038 b += 5;
3039 }
3040 }
3041 pending_exact = 0;
3042 beg_interval = NULL;
3043 }
3044 break;
3045
3046 unfetch_interval:
3047 /* If an invalid interval, match the characters as literals. */
3048 assert (beg_interval);
3049 p = beg_interval;
3050 beg_interval = NULL;
3051
3052 /* normal_char and normal_backslash need `c'. */
91c7b85d 3053 PATFETCH (c);
2b83a2a4
RM
3054
3055 if (!(syntax & RE_NO_BK_BRACES))
3056 {
3057 if (p > pattern && p[-1] == '\\')
3058 goto normal_backslash;
3059 }
3060 goto normal_char;
3061
3062#ifdef emacs
3063 /* There is no way to specify the before_dot and after_dot
3064 operators. rms says this is ok. --karl */
3065 case '=':
3066 BUF_PUSH (at_dot);
3067 break;
3068
91c7b85d 3069 case 's':
2b83a2a4
RM
3070 laststart = b;
3071 PATFETCH (c);
3072 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
3073 break;
3074
3075 case 'S':
3076 laststart = b;
3077 PATFETCH (c);
3078 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
3079 break;
3080#endif /* emacs */
3081
3082
3083 case 'w':
310b3460 3084 if (syntax & RE_NO_GNU_OPS)
4cca6b86 3085 goto normal_char;
2b83a2a4
RM
3086 laststart = b;
3087 BUF_PUSH (wordchar);
3088 break;
3089
3090
3091 case 'W':
310b3460 3092 if (syntax & RE_NO_GNU_OPS)
4cca6b86 3093 goto normal_char;
2b83a2a4
RM
3094 laststart = b;
3095 BUF_PUSH (notwordchar);
3096 break;
3097
3098
3099 case '<':
310b3460 3100 if (syntax & RE_NO_GNU_OPS)
4cca6b86 3101 goto normal_char;
2b83a2a4
RM
3102 BUF_PUSH (wordbeg);
3103 break;
3104
3105 case '>':
310b3460 3106 if (syntax & RE_NO_GNU_OPS)
4cca6b86 3107 goto normal_char;
2b83a2a4
RM
3108 BUF_PUSH (wordend);
3109 break;
3110
3111 case 'b':
310b3460 3112 if (syntax & RE_NO_GNU_OPS)
4cca6b86 3113 goto normal_char;
2b83a2a4
RM
3114 BUF_PUSH (wordbound);
3115 break;
3116
3117 case 'B':
310b3460 3118 if (syntax & RE_NO_GNU_OPS)
4cca6b86 3119 goto normal_char;
2b83a2a4
RM
3120 BUF_PUSH (notwordbound);
3121 break;
3122
3123 case '`':
310b3460 3124 if (syntax & RE_NO_GNU_OPS)
4cca6b86 3125 goto normal_char;
2b83a2a4
RM
3126 BUF_PUSH (begbuf);
3127 break;
3128
3129 case '\'':
310b3460 3130 if (syntax & RE_NO_GNU_OPS)
4cca6b86 3131 goto normal_char;
2b83a2a4
RM
3132 BUF_PUSH (endbuf);
3133 break;
3134
3135 case '1': case '2': case '3': case '4': case '5':
3136 case '6': case '7': case '8': case '9':
3137 if (syntax & RE_NO_BK_REFS)
3138 goto normal_char;
3139
3140 c1 = c - '0';
3141
3142 if (c1 > regnum)
3143 FREE_STACK_RETURN (REG_ESUBREG);
3144
3145 /* Can't back reference to a subexpression if inside of it. */
4cca6b86 3146 if (group_in_compile_stack (compile_stack, (regnum_t) c1))
2b83a2a4
RM
3147 goto normal_char;
3148
3149 laststart = b;
3150 BUF_PUSH_2 (duplicate, c1);
3151 break;
3152
3153
3154 case '+':
3155 case '?':
3156 if (syntax & RE_BK_PLUS_QM)
3157 goto handle_plus;
3158 else
3159 goto normal_backslash;
3160
3161 default:
3162 normal_backslash:
3163 /* You might think it would be useful for \ to mean
3164 not to translate; but if we don't translate it
3165 it will never match anything. */
3166 c = TRANSLATE (c);
3167 goto normal_char;
3168 }
3169 break;
3170
3171
3172 default:
3173 /* Expects the character in `c'. */
3174 normal_char:
3175 /* If no exactn currently being built. */
91c7b85d 3176 if (!pending_exact
2b83a2a4
RM
3177
3178 /* If last exactn not at current position. */
3179 || pending_exact + *pending_exact + 1 != b
91c7b85d 3180
2b83a2a4
RM
3181 /* We have only one byte following the exactn for the count. */
3182 || *pending_exact == (1 << BYTEWIDTH) - 1
3183
3184 /* If followed by a repetition operator. */
3185 || *p == '*' || *p == '^'
3186 || ((syntax & RE_BK_PLUS_QM)
3187 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
3188 : (*p == '+' || *p == '?'))
3189 || ((syntax & RE_INTERVALS)
3190 && ((syntax & RE_NO_BK_BRACES)
3191 ? *p == '{'
3192 : (p[0] == '\\' && p[1] == '{'))))
3193 {
3194 /* Start building a new exactn. */
91c7b85d 3195
2b83a2a4
RM
3196 laststart = b;
3197
3198 BUF_PUSH_2 (exactn, 0);
3199 pending_exact = b - 1;
3200 }
91c7b85d 3201
2b83a2a4
RM
3202 BUF_PUSH (c);
3203 (*pending_exact)++;
3204 break;
3205 } /* switch (c) */
3206 } /* while p != pend */
3207
91c7b85d 3208
2b83a2a4 3209 /* Through the pattern now. */
91c7b85d 3210
2b83a2a4
RM
3211 if (fixup_alt_jump)
3212 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3213
91c7b85d 3214 if (!COMPILE_STACK_EMPTY)
2b83a2a4
RM
3215 FREE_STACK_RETURN (REG_EPAREN);
3216
3217 /* If we don't want backtracking, force success
3218 the first time we reach the end of the compiled pattern. */
3219 if (syntax & RE_NO_POSIX_BACKTRACKING)
3220 BUF_PUSH (succeed);
3221
3222 free (compile_stack.stack);
3223
3224 /* We have succeeded; set the length of the buffer. */
3225 bufp->used = b - bufp->buffer;
3226
3227#ifdef DEBUG
3228 if (debug)
3229 {
3230 DEBUG_PRINT1 ("\nCompiled pattern: \n");
3231 print_compiled_pattern (bufp);
3232 }
3233#endif /* DEBUG */
3234
3235#ifndef MATCH_MAY_ALLOCATE
3236 /* Initialize the failure stack to the largest possible stack. This
3237 isn't necessary unless we're trying to avoid calling alloca in
3238 the search and match routines. */
3239 {
3240 int num_regs = bufp->re_nsub + 1;
3241
3242 /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
3243 is strictly greater than re_max_failures, the largest possible stack
3244 is 2 * re_max_failures failure points. */
3245 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
3246 {
3247 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
3248
86187531 3249# ifdef emacs
2b83a2a4
RM
3250 if (! fail_stack.stack)
3251 fail_stack.stack
91c7b85d 3252 = (fail_stack_elt_t *) xmalloc (fail_stack.size
2b83a2a4
RM
3253 * sizeof (fail_stack_elt_t));
3254 else
3255 fail_stack.stack
3256 = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
3257 (fail_stack.size
3258 * sizeof (fail_stack_elt_t)));
86187531 3259# else /* not emacs */
2b83a2a4
RM
3260 if (! fail_stack.stack)
3261 fail_stack.stack
91c7b85d 3262 = (fail_stack_elt_t *) malloc (fail_stack.size
2b83a2a4
RM
3263 * sizeof (fail_stack_elt_t));
3264 else
3265 fail_stack.stack
3266 = (fail_stack_elt_t *) realloc (fail_stack.stack,
3267 (fail_stack.size
3268 * sizeof (fail_stack_elt_t)));
86187531 3269# endif /* not emacs */
2b83a2a4
RM
3270 }
3271
3272 regex_grow_registers (num_regs);
3273 }
3274#endif /* not MATCH_MAY_ALLOCATE */
3275
3276 return REG_NOERROR;
3277} /* regex_compile */
3278\f
3279/* Subroutines for `regex_compile'. */
3280
3281/* Store OP at LOC followed by two-byte integer parameter ARG. */
3282
3283static void
3284store_op1 (op, loc, arg)
3285 re_opcode_t op;
3286 unsigned char *loc;
3287 int arg;
3288{
3289 *loc = (unsigned char) op;
3290 STORE_NUMBER (loc + 1, arg);
3291}
3292
3293
3294/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
3295
3296static void
3297store_op2 (op, loc, arg1, arg2)
3298 re_opcode_t op;
3299 unsigned char *loc;
3300 int arg1, arg2;
3301{
3302 *loc = (unsigned char) op;
3303 STORE_NUMBER (loc + 1, arg1);
3304 STORE_NUMBER (loc + 3, arg2);
3305}
3306
3307
3308/* Copy the bytes from LOC to END to open up three bytes of space at LOC
3309 for OP followed by two-byte integer parameter ARG. */
3310
3311static void
3312insert_op1 (op, loc, arg, end)
3313 re_opcode_t op;
3314 unsigned char *loc;
3315 int arg;
91c7b85d 3316 unsigned char *end;
2b83a2a4
RM
3317{
3318 register unsigned char *pfrom = end;
3319 register unsigned char *pto = end + 3;
3320
3321 while (pfrom != loc)
3322 *--pto = *--pfrom;
91c7b85d 3323
2b83a2a4
RM
3324 store_op1 (op, loc, arg);
3325}
3326
3327
3328/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
3329
3330static void
3331insert_op2 (op, loc, arg1, arg2, end)
3332 re_opcode_t op;
3333 unsigned char *loc;
3334 int arg1, arg2;
91c7b85d 3335 unsigned char *end;
2b83a2a4
RM
3336{
3337 register unsigned char *pfrom = end;
3338 register unsigned char *pto = end + 5;
3339
3340 while (pfrom != loc)
3341 *--pto = *--pfrom;
91c7b85d 3342
2b83a2a4
RM
3343 store_op2 (op, loc, arg1, arg2);
3344}
3345
3346
3347/* P points to just after a ^ in PATTERN. Return true if that ^ comes
3348 after an alternative or a begin-subexpression. We assume there is at
3349 least one character before the ^. */
3350
3351static boolean
3352at_begline_loc_p (pattern, p, syntax)
3353 const char *pattern, *p;
3354 reg_syntax_t syntax;
3355{
3356 const char *prev = p - 2;
3357 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
91c7b85d 3358
2b83a2a4
RM
3359 return
3360 /* After a subexpression? */
3361 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3362 /* After an alternative? */
3363 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3364}
3365
3366
3367/* The dual of at_begline_loc_p. This one is for $. We assume there is
3368 at least one character after the $, i.e., `P < PEND'. */
3369
3370static boolean
3371at_endline_loc_p (p, pend, syntax)
3372 const char *p, *pend;
4cca6b86 3373 reg_syntax_t syntax;
2b83a2a4
RM
3374{
3375 const char *next = p;
3376 boolean next_backslash = *next == '\\';
5bf62f2d 3377 const char *next_next = p + 1 < pend ? p + 1 : 0;
91c7b85d 3378
2b83a2a4
RM
3379 return
3380 /* Before a subexpression? */
3381 (syntax & RE_NO_BK_PARENS ? *next == ')'
3382 : next_backslash && next_next && *next_next == ')')
3383 /* Before an alternative? */
3384 || (syntax & RE_NO_BK_VBAR ? *next == '|'
3385 : next_backslash && next_next && *next_next == '|');
3386}
3387
3388
91c7b85d 3389/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2b83a2a4
RM
3390 false if it's not. */
3391
3392static boolean
3393group_in_compile_stack (compile_stack, regnum)
3394 compile_stack_type compile_stack;
3395 regnum_t regnum;
3396{
3397 int this_element;
3398
91c7b85d
RM
3399 for (this_element = compile_stack.avail - 1;
3400 this_element >= 0;
2b83a2a4
RM
3401 this_element--)
3402 if (compile_stack.stack[this_element].regnum == regnum)
3403 return true;
3404
3405 return false;
3406}
3407
3408
3409/* Read the ending character of a range (in a bracket expression) from the
3410 uncompiled pattern *P_PTR (which ends at PEND). We assume the
3411 starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
3412 Then we set the translation of all bits between the starting and
3413 ending characters (inclusive) in the compiled pattern B.
91c7b85d 3414
2b83a2a4 3415 Return an error code.
91c7b85d 3416
2b83a2a4
RM
3417 We use these short variable names so we can use the same macros as
3418 `regex_compile' itself. */
3419
3420static reg_errcode_t
ac8295d2
UD
3421compile_range (range_start, p_ptr, pend, translate, syntax, b)
3422 unsigned int range_start;
2b83a2a4 3423 const char **p_ptr, *pend;
03a75825 3424 RE_TRANSLATE_TYPE translate;
2b83a2a4
RM
3425 reg_syntax_t syntax;
3426 unsigned char *b;
3427{
3428 unsigned this_char;
3429
3430 const char *p = *p_ptr;
ac8295d2 3431 unsigned int range_end;
91c7b85d 3432
2b83a2a4
RM
3433 if (p == pend)
3434 return REG_ERANGE;
3435
3436 /* Even though the pattern is a signed `char *', we need to fetch
3437 with unsigned char *'s; if the high bit of the pattern character
3438 is set, the range endpoints will be negative if we fetch using a
3439 signed char *.
3440
91c7b85d 3441 We also want to fetch the endpoints without translating them; the
2b83a2a4
RM
3442 appropriate translation is done in the bit-setting loop below. */
3443 /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
2b83a2a4
RM
3444 range_end = ((const unsigned char *) p)[0];
3445
3446 /* Have to increment the pointer into the pattern string, so the
3447 caller isn't still at the ending character. */
3448 (*p_ptr)++;
3449
3450 /* If the start is after the end, the range is empty. */
3451 if (range_start > range_end)
3452 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3453
3454 /* Here we see why `this_char' has to be larger than an `unsigned
3455 char' -- the range is inclusive, so if `range_end' == 0xff
3456 (assuming 8-bit characters), we would otherwise go into an infinite
3457 loop, since all characters <= 0xff. */
3458 for (this_char = range_start; this_char <= range_end; this_char++)
3459 {
3460 SET_LIST_BIT (TRANSLATE (this_char));
3461 }
91c7b85d 3462
2b83a2a4
RM
3463 return REG_NOERROR;
3464}
3465\f
3466/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3467 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
3468 characters can start a string that matches the pattern. This fastmap
3469 is used by re_search to skip quickly over impossible starting points.
3470
3471 The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3472 area as BUFP->fastmap.
91c7b85d 3473
2b83a2a4
RM
3474 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3475 the pattern buffer.
3476
3477 Returns 0 if we succeed, -2 if an internal error. */
3478
3479int
3480re_compile_fastmap (bufp)
3481 struct re_pattern_buffer *bufp;
3482{
3483 int j, k;
3484#ifdef MATCH_MAY_ALLOCATE
3485 fail_stack_type fail_stack;
3486#endif
3487#ifndef REGEX_MALLOC
3488 char *destination;
3489#endif
91c7b85d 3490
2b83a2a4
RM
3491 register char *fastmap = bufp->fastmap;
3492 unsigned char *pattern = bufp->buffer;
2b83a2a4 3493 unsigned char *p = pattern;
4cca6b86 3494 register unsigned char *pend = pattern + bufp->used;
2b83a2a4 3495
4cca6b86 3496#ifdef REL_ALLOC
2b83a2a4
RM
3497 /* This holds the pointer to the failure stack, when
3498 it is allocated relocatably. */
3499 fail_stack_elt_t *failure_stack_ptr;
4cca6b86 3500#endif
2b83a2a4
RM
3501
3502 /* Assume that each path through the pattern can be null until
3503 proven otherwise. We set this false at the bottom of switch
3504 statement, to which we get only if a particular path doesn't
3505 match the empty string. */
3506 boolean path_can_be_null = true;
3507
3508 /* We aren't doing a `succeed_n' to begin with. */
3509 boolean succeed_n_p = false;
3510
3511 assert (fastmap != NULL && p != NULL);
91c7b85d 3512
2b83a2a4
RM
3513 INIT_FAIL_STACK ();
3514 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
3515 bufp->fastmap_accurate = 1; /* It will be when we're done. */
3516 bufp->can_be_null = 0;
91c7b85d 3517
2b83a2a4
RM
3518 while (1)
3519 {
3520 if (p == pend || *p == succeed)
3521 {
3522 /* We have reached the (effective) end of pattern. */
3523 if (!FAIL_STACK_EMPTY ())
3524 {
3525 bufp->can_be_null |= path_can_be_null;
3526
3527 /* Reset for next path. */
3528 path_can_be_null = true;
3529
3530 p = fail_stack.stack[--fail_stack.avail].pointer;
3531
3532 continue;
3533 }
3534 else
3535 break;
3536 }
3537
3538 /* We should never be about to go beyond the end of the pattern. */
3539 assert (p < pend);
91c7b85d 3540
2b83a2a4
RM
3541 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3542 {
3543
3544 /* I guess the idea here is to simply not bother with a fastmap
3545 if a backreference is used, since it's too hard to figure out
3546 the fastmap for the corresponding group. Setting
3547 `can_be_null' stops `re_search_2' from using the fastmap, so
3548 that is all we do. */
3549 case duplicate:
3550 bufp->can_be_null = 1;
3551 goto done;
3552
3553
3554 /* Following are the cases which match a character. These end
3555 with `break'. */
3556
3557 case exactn:
3558 fastmap[p[1]] = 1;
3559 break;
3560
3561
3562 case charset:
3563 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3564 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3565 fastmap[j] = 1;
3566 break;
3567
3568
3569 case charset_not:
3570 /* Chars beyond end of map must be allowed. */
3571 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3572 fastmap[j] = 1;
3573
3574 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3575 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3576 fastmap[j] = 1;
3577 break;
3578
3579
3580 case wordchar:
3581 for (j = 0; j < (1 << BYTEWIDTH); j++)
3582 if (SYNTAX (j) == Sword)
3583 fastmap[j] = 1;
3584 break;
3585
3586
3587 case notwordchar:
3588 for (j = 0; j < (1 << BYTEWIDTH); j++)
3589 if (SYNTAX (j) != Sword)
3590 fastmap[j] = 1;
3591 break;
3592
3593
3594 case anychar:
3595 {
3596 int fastmap_newline = fastmap['\n'];
3597
3598 /* `.' matches anything ... */
3599 for (j = 0; j < (1 << BYTEWIDTH); j++)
3600 fastmap[j] = 1;
3601
3602 /* ... except perhaps newline. */
3603 if (!(bufp->syntax & RE_DOT_NEWLINE))
3604 fastmap['\n'] = fastmap_newline;
3605
3606 /* Return if we have already set `can_be_null'; if we have,
3607 then the fastmap is irrelevant. Something's wrong here. */
3608 else if (bufp->can_be_null)
3609 goto done;
3610
3611 /* Otherwise, have to check alternative paths. */
3612 break;
3613 }
3614
3615#ifdef emacs
3616 case syntaxspec:
3617 k = *p++;
3618 for (j = 0; j < (1 << BYTEWIDTH); j++)
3619 if (SYNTAX (j) == (enum syntaxcode) k)
3620 fastmap[j] = 1;
3621 break;
3622
3623
3624 case notsyntaxspec:
3625 k = *p++;
3626 for (j = 0; j < (1 << BYTEWIDTH); j++)
3627 if (SYNTAX (j) != (enum syntaxcode) k)
3628 fastmap[j] = 1;
3629 break;
3630
3631
3632 /* All cases after this match the empty string. These end with
3633 `continue'. */
3634
3635
3636 case before_dot:
3637 case at_dot:
3638 case after_dot:
3639 continue;
44c8d1a2 3640#endif /* emacs */
2b83a2a4
RM
3641
3642
3643 case no_op:
3644 case begline:
3645 case endline:
3646 case begbuf:
3647 case endbuf:
3648 case wordbound:
3649 case notwordbound:
3650 case wordbeg:
3651 case wordend:
3652 case push_dummy_failure:
3653 continue;
3654
3655
3656 case jump_n:
3657 case pop_failure_jump:
3658 case maybe_pop_jump:
3659 case jump:
3660 case jump_past_alt:
3661 case dummy_failure_jump:
3662 EXTRACT_NUMBER_AND_INCR (j, p);
91c7b85d 3663 p += j;
2b83a2a4
RM
3664 if (j > 0)
3665 continue;
91c7b85d 3666
2b83a2a4
RM
3667 /* Jump backward implies we just went through the body of a
3668 loop and matched nothing. Opcode jumped to should be
3669 `on_failure_jump' or `succeed_n'. Just treat it like an
3670 ordinary jump. For a * loop, it has pushed its failure
3671 point already; if so, discard that as redundant. */
3672 if ((re_opcode_t) *p != on_failure_jump
3673 && (re_opcode_t) *p != succeed_n)
3674 continue;
3675
3676 p++;
3677 EXTRACT_NUMBER_AND_INCR (j, p);
91c7b85d
RM
3678 p += j;
3679
2b83a2a4 3680 /* If what's on the stack is where we are now, pop it. */
91c7b85d 3681 if (!FAIL_STACK_EMPTY ()
2b83a2a4
RM
3682 && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3683 fail_stack.avail--;
3684
3685 continue;
3686
3687
3688 case on_failure_jump:
3689 case on_failure_keep_string_jump:
3690 handle_on_failure_jump:
3691 EXTRACT_NUMBER_AND_INCR (j, p);
3692
3693 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3694 end of the pattern. We don't want to push such a point,
3695 since when we restore it above, entering the switch will
3696 increment `p' past the end of the pattern. We don't need
3697 to push such a point since we obviously won't find any more
3698 fastmap entries beyond `pend'. Such a pattern can match
3699 the null string, though. */
3700 if (p + j < pend)
3701 {
3702 if (!PUSH_PATTERN_OP (p + j, fail_stack))
3703 {
3704 RESET_FAIL_STACK ();
3705 return -2;
3706 }
3707 }
3708 else
3709 bufp->can_be_null = 1;
3710
3711 if (succeed_n_p)
3712 {
3713 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
3714 succeed_n_p = false;
3715 }
3716
3717 continue;
3718
3719
3720 case succeed_n:
3721 /* Get to the number of times to succeed. */
91c7b85d 3722 p += 2;
2b83a2a4
RM
3723
3724 /* Increment p past the n for when k != 0. */
3725 EXTRACT_NUMBER_AND_INCR (k, p);
3726 if (k == 0)
3727 {
3728 p -= 4;
3729 succeed_n_p = true; /* Spaghetti code alert. */
3730 goto handle_on_failure_jump;
3731 }
3732 continue;
3733
3734
3735 case set_number_at:
3736 p += 4;
3737 continue;
3738
3739
3740 case start_memory:
3741 case stop_memory:
3742 p += 2;
3743 continue;
3744
3745
3746 default:
3747 abort (); /* We have listed all the cases. */
3748 } /* switch *p++ */
3749
3750 /* Getting here means we have found the possible starting
3751 characters for one path of the pattern -- and that the empty
3752 string does not match. We need not follow this path further.
3753 Instead, look at the next alternative (remembered on the
3754 stack), or quit if no more. The test at the top of the loop
3755 does these things. */
3756 path_can_be_null = false;
3757 p = pend;
3758 } /* while p */
3759
3760 /* Set `can_be_null' for the last path (also the first path, if the
3761 pattern is empty). */
3762 bufp->can_be_null |= path_can_be_null;
3763
3764 done:
3765 RESET_FAIL_STACK ();
3766 return 0;
3767} /* re_compile_fastmap */
2ad4fab2
UD
3768#ifdef _LIBC
3769weak_alias (__re_compile_fastmap, re_compile_fastmap)
3770#endif
2b83a2a4
RM
3771\f
3772/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3773 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
3774 this memory for recording register information. STARTS and ENDS
3775 must be allocated using the malloc library routine, and must each
3776 be at least NUM_REGS * sizeof (regoff_t) bytes long.
3777
3778 If NUM_REGS == 0, then subsequent matches should allocate their own
3779 register data.
3780
3781 Unless this function is called, the first search or match using
3782 PATTERN_BUFFER will allocate its own register data, without
3783 freeing the old data. */
3784
3785void
3786re_set_registers (bufp, regs, num_regs, starts, ends)
3787 struct re_pattern_buffer *bufp;
3788 struct re_registers *regs;
3789 unsigned num_regs;
3790 regoff_t *starts, *ends;
3791{
3792 if (num_regs)
3793 {
3794 bufp->regs_allocated = REGS_REALLOCATE;
3795 regs->num_regs = num_regs;
3796 regs->start = starts;
3797 regs->end = ends;
3798 }
3799 else
3800 {
3801 bufp->regs_allocated = REGS_UNALLOCATED;
3802 regs->num_regs = 0;
3803 regs->start = regs->end = (regoff_t *) 0;
3804 }
3805}
2ad4fab2
UD
3806#ifdef _LIBC
3807weak_alias (__re_set_registers, re_set_registers)
3808#endif
2b83a2a4
RM
3809\f
3810/* Searching routines. */
3811
3812/* Like re_search_2, below, but only one string is specified, and
3813 doesn't let you say where to stop matching. */
3814
3815int
3816re_search (bufp, string, size, startpos, range, regs)
3817 struct re_pattern_buffer *bufp;
3818 const char *string;
3819 int size, startpos, range;
3820 struct re_registers *regs;
3821{
91c7b85d 3822 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2b83a2a4
RM
3823 regs, size);
3824}
2ad4fab2
UD
3825#ifdef _LIBC
3826weak_alias (__re_search, re_search)
3827#endif
2b83a2a4
RM
3828
3829
3830/* Using the compiled pattern in BUFP->buffer, first tries to match the
3831 virtual concatenation of STRING1 and STRING2, starting first at index
3832 STARTPOS, then at STARTPOS + 1, and so on.
91c7b85d 3833
2b83a2a4 3834 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
91c7b85d 3835
2b83a2a4
RM
3836 RANGE is how far to scan while trying to match. RANGE = 0 means try
3837 only at STARTPOS; in general, the last start tried is STARTPOS +
3838 RANGE.
91c7b85d 3839
2b83a2a4
RM
3840 In REGS, return the indices of the virtual concatenation of STRING1
3841 and STRING2 that matched the entire BUFP->buffer and its contained
3842 subexpressions.
91c7b85d 3843
2b83a2a4
RM
3844 Do not consider matching one past the index STOP in the virtual
3845 concatenation of STRING1 and STRING2.
3846
3847 We return either the position in the strings at which the match was
3848 found, -1 if no match, or -2 if error (such as failure
3849 stack overflow). */
3850
3851int
3852re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3853 struct re_pattern_buffer *bufp;
3854 const char *string1, *string2;
3855 int size1, size2;
3856 int startpos;
3857 int range;
3858 struct re_registers *regs;
3859 int stop;
3860{
3861 int val;
3862 register char *fastmap = bufp->fastmap;
03a75825 3863 register RE_TRANSLATE_TYPE translate = bufp->translate;
2b83a2a4
RM
3864 int total_size = size1 + size2;
3865 int endpos = startpos + range;
3866
3867 /* Check for out-of-range STARTPOS. */
3868 if (startpos < 0 || startpos > total_size)
3869 return -1;
91c7b85d 3870
2b83a2a4 3871 /* Fix up RANGE if it might eventually take us outside
57aefafe 3872 the virtual concatenation of STRING1 and STRING2.
91c7b85d 3873 Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
57aefafe
RM
3874 if (endpos < 0)
3875 range = 0 - startpos;
2b83a2a4
RM
3876 else if (endpos > total_size)
3877 range = total_size - startpos;
3878
3879 /* If the search isn't to be a backwards one, don't waste time in a
3880 search for a pattern that must be anchored. */
7cabd57c
UD
3881 if (bufp->used > 0 && range > 0
3882 && ((re_opcode_t) bufp->buffer[0] == begbuf
3883 /* `begline' is like `begbuf' if it cannot match at newlines. */
3884 || ((re_opcode_t) bufp->buffer[0] == begline
3885 && !bufp->newline_anchor)))
2b83a2a4
RM
3886 {
3887 if (startpos > 0)
3888 return -1;
3889 else
3890 range = 1;
3891 }
3892
44c8d1a2
RM
3893#ifdef emacs
3894 /* In a forward search for something that starts with \=.
3895 don't keep searching past point. */
3896 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3897 {
3898 range = PT - startpos;
3899 if (range <= 0)
3900 return -1;
3901 }
3902#endif /* emacs */
3903
2b83a2a4
RM
3904 /* Update the fastmap now if not correct already. */
3905 if (fastmap && !bufp->fastmap_accurate)
3906 if (re_compile_fastmap (bufp) == -2)
3907 return -2;
91c7b85d 3908
2b83a2a4
RM
3909 /* Loop through the string, looking for a place to start matching. */
3910 for (;;)
91c7b85d 3911 {
2b83a2a4
RM
3912 /* If a fastmap is supplied, skip quickly over characters that
3913 cannot be the start of a match. If the pattern can match the
3914 null string, however, we don't need to skip characters; we want
3915 the first null string. */
3916 if (fastmap && startpos < total_size && !bufp->can_be_null)
3917 {
3918 if (range > 0) /* Searching forwards. */
3919 {
3920 register const char *d;
3921 register int lim = 0;
3922 int irange = range;
3923
3924 if (startpos < size1 && startpos + range >= size1)
3925 lim = range - (size1 - startpos);
3926
3927 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
91c7b85d 3928
2b83a2a4
RM
3929 /* Written out as an if-else to avoid testing `translate'
3930 inside the loop. */
3931 if (translate)
3932 while (range > lim
3933 && !fastmap[(unsigned char)
3934 translate[(unsigned char) *d++]])
3935 range--;
3936 else
3937 while (range > lim && !fastmap[(unsigned char) *d++])
3938 range--;
3939
3940 startpos += irange - range;
3941 }
3942 else /* Searching backwards. */
3943 {
3944 register char c = (size1 == 0 || startpos >= size1
91c7b85d 3945 ? string2[startpos - size1]
2b83a2a4
RM
3946 : string1[startpos]);
3947
3948 if (!fastmap[(unsigned char) TRANSLATE (c)])
3949 goto advance;
3950 }
3951 }
3952
3953 /* If can't match the null string, and that's all we have left, fail. */
3954 if (range >= 0 && startpos == total_size && fastmap
3955 && !bufp->can_be_null)
3956 return -1;
3957
3958 val = re_match_2_internal (bufp, string1, size1, string2, size2,
3959 startpos, regs, stop);
3960#ifndef REGEX_MALLOC
86187531 3961# ifdef C_ALLOCA
2b83a2a4 3962 alloca (0);
86187531 3963# endif
2b83a2a4
RM
3964#endif
3965
3966 if (val >= 0)
3967 return startpos;
91c7b85d 3968
2b83a2a4
RM
3969 if (val == -2)
3970 return -2;
3971
3972 advance:
91c7b85d 3973 if (!range)
2b83a2a4 3974 break;
91c7b85d 3975 else if (range > 0)
2b83a2a4 3976 {
91c7b85d 3977 range--;
2b83a2a4
RM
3978 startpos++;
3979 }
3980 else
3981 {
91c7b85d 3982 range++;
2b83a2a4
RM
3983 startpos--;
3984 }
3985 }
3986 return -1;
3987} /* re_search_2 */
2ad4fab2
UD
3988#ifdef _LIBC
3989weak_alias (__re_search_2, re_search_2)
3990#endif
2b83a2a4 3991\f
2b83a2a4
RM
3992/* This converts PTR, a pointer into one of the search strings `string1'
3993 and `string2' into an offset from the beginning of that string. */
3994#define POINTER_TO_OFFSET(ptr) \
3995 (FIRST_STRING_P (ptr) \
3996 ? ((regoff_t) ((ptr) - string1)) \
3997 : ((regoff_t) ((ptr) - string2 + size1)))
3998
3999/* Macros for dealing with the split strings in re_match_2. */
4000
4001#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
4002
4003/* Call before fetching a character with *d. This switches over to
4004 string2 if necessary. */
4005#define PREFETCH() \
4006 while (d == dend) \
4007 { \
4008 /* End of string2 => fail. */ \
4009 if (dend == end_match_2) \
4010 goto fail; \
4011 /* End of string1 => advance to string2. */ \
4012 d = string2; \
4013 dend = end_match_2; \
4014 }
4015
4016
4017/* Test if at very beginning or at very end of the virtual concatenation
4018 of `string1' and `string2'. If only one string, it's `string2'. */
4019#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
91c7b85d 4020#define AT_STRINGS_END(d) ((d) == end2)
2b83a2a4
RM
4021
4022
4023/* Test if D points to a character which is word-constituent. We have
4024 two special cases to check for: if past the end of string1, look at
4025 the first character in string2; and if before the beginning of
4026 string2, look at the last character in string1. */
4027#define WORDCHAR_P(d) \
4028 (SYNTAX ((d) == end1 ? *string2 \
4029 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
4030 == Sword)
4031
51702635
UD
4032/* Disabled due to a compiler bug -- see comment at case wordbound */
4033#if 0
2b83a2a4
RM
4034/* Test if the character before D and the one at D differ with respect
4035 to being word-constituent. */
4036#define AT_WORD_BOUNDARY(d) \
4037 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
4038 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
51702635 4039#endif
2b83a2a4
RM
4040
4041/* Free everything we malloc. */
4042#ifdef MATCH_MAY_ALLOCATE
86187531
UD
4043# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
4044# define FREE_VARIABLES() \
2b83a2a4
RM
4045 do { \
4046 REGEX_FREE_STACK (fail_stack.stack); \
4047 FREE_VAR (regstart); \
4048 FREE_VAR (regend); \
4049 FREE_VAR (old_regstart); \
4050 FREE_VAR (old_regend); \
4051 FREE_VAR (best_regstart); \
4052 FREE_VAR (best_regend); \
4053 FREE_VAR (reg_info); \
4054 FREE_VAR (reg_dummy); \
4055 FREE_VAR (reg_info_dummy); \
4056 } while (0)
4057#else
86187531 4058# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
2b83a2a4
RM
4059#endif /* not MATCH_MAY_ALLOCATE */
4060
4061/* These values must meet several constraints. They must not be valid
4062 register values; since we have a limit of 255 registers (because
4063 we use only one byte in the pattern for the register number), we can
4064 use numbers larger than 255. They must differ by 1, because of
4065 NUM_FAILURE_ITEMS above. And the value for the lowest register must
4066 be larger than the value for the highest register, so we do not try
4067 to actually save any registers when none are active. */
4068#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4069#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4070\f
4071/* Matching routines. */
4072
4073#ifndef emacs /* Emacs never uses this. */
4074/* re_match is like re_match_2 except it takes only a single string. */
4075
4076int
4077re_match (bufp, string, size, pos, regs)
4078 struct re_pattern_buffer *bufp;
4079 const char *string;
4080 int size, pos;
4081 struct re_registers *regs;
4082{
4083 int result = re_match_2_internal (bufp, NULL, 0, string, size,
4084 pos, regs, size);
86187531
UD
4085# ifndef REGEX_MALLOC
4086# ifdef C_ALLOCA
2b83a2a4 4087 alloca (0);
86187531
UD
4088# endif
4089# endif
2b83a2a4
RM
4090 return result;
4091}
2ad4fab2
UD
4092# ifdef _LIBC
4093weak_alias (__re_match, re_match)
4094# endif
2b83a2a4
RM
4095#endif /* not emacs */
4096
4cca6b86
UD
4097static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
4098 unsigned char *end,
4099 register_info_type *reg_info));
4100static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
4101 unsigned char *end,
4102 register_info_type *reg_info));
4103static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
4104 unsigned char *end,
4105 register_info_type *reg_info));
4106static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
4107 int len, char *translate));
2b83a2a4
RM
4108
4109/* re_match_2 matches the compiled pattern in BUFP against the
4110 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4111 and SIZE2, respectively). We start matching at POS, and stop
4112 matching at STOP.
91c7b85d 4113
2b83a2a4
RM
4114 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4115 store offsets for the substring each group matched in REGS. See the
4116 documentation for exactly how many groups we fill.
4117
4118 We return -1 if no match, -2 if an internal error (such as the
4119 failure stack overflowing). Otherwise, we return the length of the
4120 matched substring. */
4121
4122int
4123re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4124 struct re_pattern_buffer *bufp;
4125 const char *string1, *string2;
4126 int size1, size2;
4127 int pos;
4128 struct re_registers *regs;
4129 int stop;
4130{
4131 int result = re_match_2_internal (bufp, string1, size1, string2, size2,
4132 pos, regs, stop);
4cca6b86 4133#ifndef REGEX_MALLOC
86187531 4134# ifdef C_ALLOCA
2b83a2a4 4135 alloca (0);
86187531 4136# endif
4cca6b86 4137#endif
2b83a2a4
RM
4138 return result;
4139}
2ad4fab2
UD
4140#ifdef _LIBC
4141weak_alias (__re_match_2, re_match_2)
4142#endif
2b83a2a4
RM
4143
4144/* This is a separate function so that we can force an alloca cleanup
4145 afterwards. */
4146static int
4147re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4148 struct re_pattern_buffer *bufp;
4149 const char *string1, *string2;
4150 int size1, size2;
4151 int pos;
4152 struct re_registers *regs;
4153 int stop;
4154{
4155 /* General temporaries. */
4156 int mcnt;
4157 unsigned char *p1;
4158
4159 /* Just past the end of the corresponding string. */
4160 const char *end1, *end2;
4161
4162 /* Pointers into string1 and string2, just past the last characters in
4163 each to consider matching. */
4164 const char *end_match_1, *end_match_2;
4165
4166 /* Where we are in the data, and the end of the current string. */
4167 const char *d, *dend;
91c7b85d 4168
2b83a2a4
RM
4169 /* Where we are in the pattern, and the end of the pattern. */
4170 unsigned char *p = bufp->buffer;
4171 register unsigned char *pend = p + bufp->used;
4172
4173 /* Mark the opcode just after a start_memory, so we can test for an
4174 empty subpattern when we get to the stop_memory. */
4175 unsigned char *just_past_start_mem = 0;
4176
4177 /* We use this to map every character in the string. */
03a75825 4178 RE_TRANSLATE_TYPE translate = bufp->translate;
2b83a2a4
RM
4179
4180 /* Failure point stack. Each place that can handle a failure further
4181 down the line pushes a failure point on this stack. It consists of
4182 restart, regend, and reg_info for all registers corresponding to
4183 the subexpressions we're currently inside, plus the number of such
4184 registers, and, finally, two char *'s. The first char * is where
4185 to resume scanning the pattern; the second one is where to resume
4186 scanning the strings. If the latter is zero, the failure point is
4187 a ``dummy''; if a failure happens and the failure point is a dummy,
4188 it gets discarded and the next next one is tried. */
4189#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4190 fail_stack_type fail_stack;
4191#endif
4192#ifdef DEBUG
c4563d2d 4193 static unsigned failure_id;
2b83a2a4
RM
4194 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4195#endif
4196
4cca6b86 4197#ifdef REL_ALLOC
2b83a2a4
RM
4198 /* This holds the pointer to the failure stack, when
4199 it is allocated relocatably. */
4200 fail_stack_elt_t *failure_stack_ptr;
4cca6b86 4201#endif
2b83a2a4
RM
4202
4203 /* We fill all the registers internally, independent of what we
4204 return, for use in backreferences. The number here includes
4205 an element for register zero. */
4cca6b86 4206 size_t num_regs = bufp->re_nsub + 1;
91c7b85d 4207
2b83a2a4 4208 /* The currently active registers. */
4cca6b86
UD
4209 active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4210 active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
2b83a2a4
RM
4211
4212 /* Information on the contents of registers. These are pointers into
4213 the input strings; they record just what was matched (on this
4214 attempt) by a subexpression part of the pattern, that is, the
4215 regnum-th regstart pointer points to where in the pattern we began
4216 matching and the regnum-th regend points to right after where we
4217 stopped matching the regnum-th subexpression. (The zeroth register
4218 keeps track of what the whole pattern matches.) */
4219#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4220 const char **regstart, **regend;
4221#endif
4222
4223 /* If a group that's operated upon by a repetition operator fails to
4224 match anything, then the register for its start will need to be
4225 restored because it will have been set to wherever in the string we
4226 are when we last see its open-group operator. Similarly for a
4227 register's end. */
4228#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4229 const char **old_regstart, **old_regend;
4230#endif
4231
4232 /* The is_active field of reg_info helps us keep track of which (possibly
4233 nested) subexpressions we are currently in. The matched_something
4234 field of reg_info[reg_num] helps us tell whether or not we have
4235 matched any of the pattern so far this time through the reg_num-th
4236 subexpression. These two fields get reset each time through any
4237 loop their register is in. */
4238#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
91c7b85d 4239 register_info_type *reg_info;
2b83a2a4
RM
4240#endif
4241
4242 /* The following record the register info as found in the above
91c7b85d 4243 variables when we find a match better than any we've seen before.
2b83a2a4
RM
4244 This happens as we backtrack through the failure points, which in
4245 turn happens only if we have not yet matched the entire string. */
4246 unsigned best_regs_set = false;
4247#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4248 const char **best_regstart, **best_regend;
4249#endif
91c7b85d 4250
2b83a2a4
RM
4251 /* Logically, this is `best_regend[0]'. But we don't want to have to
4252 allocate space for that if we're not allocating space for anything
4253 else (see below). Also, we never need info about register 0 for
4254 any of the other register vectors, and it seems rather a kludge to
4255 treat `best_regend' differently than the rest. So we keep track of
4256 the end of the best match so far in a separate variable. We
4257 initialize this to NULL so that when we backtrack the first time
4258 and need to test it, it's not garbage. */
4259 const char *match_end = NULL;
4260
4261 /* This helps SET_REGS_MATCHED avoid doing redundant work. */
4262 int set_regs_matched_done = 0;
4263
4264 /* Used when we pop values we don't care about. */
4265#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4266 const char **reg_dummy;
4267 register_info_type *reg_info_dummy;
4268#endif
4269
4270#ifdef DEBUG
4271 /* Counts the total number of registers pushed. */
91c7b85d 4272 unsigned num_regs_pushed = 0;
2b83a2a4
RM
4273#endif
4274
4275 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
91c7b85d 4276
2b83a2a4 4277 INIT_FAIL_STACK ();
91c7b85d 4278
2b83a2a4
RM
4279#ifdef MATCH_MAY_ALLOCATE
4280 /* Do not bother to initialize all the register variables if there are
4281 no groups in the pattern, as it takes a fair amount of time. If
4282 there are groups, we include space for register 0 (the whole
4283 pattern), even though we never use it, since it simplifies the
4284 array indexing. We should fix this. */
4285 if (bufp->re_nsub)
4286 {
4287 regstart = REGEX_TALLOC (num_regs, const char *);
4288 regend = REGEX_TALLOC (num_regs, const char *);
4289 old_regstart = REGEX_TALLOC (num_regs, const char *);
4290 old_regend = REGEX_TALLOC (num_regs, const char *);
4291 best_regstart = REGEX_TALLOC (num_regs, const char *);
4292 best_regend = REGEX_TALLOC (num_regs, const char *);
4293 reg_info = REGEX_TALLOC (num_regs, register_info_type);
4294 reg_dummy = REGEX_TALLOC (num_regs, const char *);
4295 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
4296
91c7b85d
RM
4297 if (!(regstart && regend && old_regstart && old_regend && reg_info
4298 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
2b83a2a4
RM
4299 {
4300 FREE_VARIABLES ();
4301 return -2;
4302 }
4303 }
4304 else
4305 {
4306 /* We must initialize all our variables to NULL, so that
4307 `FREE_VARIABLES' doesn't try to free them. */
4308 regstart = regend = old_regstart = old_regend = best_regstart
4309 = best_regend = reg_dummy = NULL;
4310 reg_info = reg_info_dummy = (register_info_type *) NULL;
4311 }
4312#endif /* MATCH_MAY_ALLOCATE */
4313
4314 /* The starting position is bogus. */
4315 if (pos < 0 || pos > size1 + size2)
4316 {
4317 FREE_VARIABLES ();
4318 return -1;
4319 }
91c7b85d 4320
2b83a2a4
RM
4321 /* Initialize subexpression text positions to -1 to mark ones that no
4322 start_memory/stop_memory has been seen for. Also initialize the
4323 register information struct. */
cccda09f 4324 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
2b83a2a4 4325 {
91c7b85d 4326 regstart[mcnt] = regend[mcnt]
2b83a2a4 4327 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
91c7b85d 4328
2b83a2a4
RM
4329 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
4330 IS_ACTIVE (reg_info[mcnt]) = 0;
4331 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4332 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4333 }
91c7b85d 4334
2b83a2a4
RM
4335 /* We move `string1' into `string2' if the latter's empty -- but not if
4336 `string1' is null. */
4337 if (size2 == 0 && string1 != NULL)
4338 {
4339 string2 = string1;
4340 size2 = size1;
4341 string1 = 0;
4342 size1 = 0;
4343 }
4344 end1 = string1 + size1;
4345 end2 = string2 + size2;
4346
4347 /* Compute where to stop matching, within the two strings. */
4348 if (stop <= size1)
4349 {
4350 end_match_1 = string1 + stop;
4351 end_match_2 = string2;
4352 }
4353 else
4354 {
4355 end_match_1 = end1;
4356 end_match_2 = string2 + stop - size1;
4357 }
4358
91c7b85d 4359 /* `p' scans through the pattern as `d' scans through the data.
2b83a2a4
RM
4360 `dend' is the end of the input string that `d' points within. `d'
4361 is advanced into the following input string whenever necessary, but
4362 this happens before fetching; therefore, at the beginning of the
4363 loop, `d' can be pointing at the end of a string, but it cannot
4364 equal `string2'. */
4365 if (size1 > 0 && pos <= size1)
4366 {
4367 d = string1 + pos;
4368 dend = end_match_1;
4369 }
4370 else
4371 {
4372 d = string2 + pos - size1;
4373 dend = end_match_2;
4374 }
4375
5929563f 4376 DEBUG_PRINT1 ("The compiled pattern is:\n");
2b83a2a4
RM
4377 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4378 DEBUG_PRINT1 ("The string to match is: `");
4379 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4380 DEBUG_PRINT1 ("'\n");
91c7b85d 4381
2b83a2a4
RM
4382 /* This loops over pattern commands. It exits by returning from the
4383 function if the match is complete, or it drops through if the match
4384 fails at this starting point in the input data. */
4385 for (;;)
4386 {
5929563f
UD
4387#ifdef _LIBC
4388 DEBUG_PRINT2 ("\n%p: ", p);
4389#else
2b83a2a4 4390 DEBUG_PRINT2 ("\n0x%x: ", p);
5929563f 4391#endif
2b83a2a4
RM
4392
4393 if (p == pend)
4394 { /* End of pattern means we might have succeeded. */
4395 DEBUG_PRINT1 ("end of pattern ... ");
91c7b85d 4396
2b83a2a4
RM
4397 /* If we haven't matched the entire string, and we want the
4398 longest match, try backtracking. */
4399 if (d != end_match_2)
4400 {
4401 /* 1 if this match ends in the same string (string1 or string2)
4402 as the best previous match. */
91c7b85d 4403 boolean same_str_p = (FIRST_STRING_P (match_end)
2b83a2a4
RM
4404 == MATCHING_IN_FIRST_STRING);
4405 /* 1 if this match is the best seen so far. */
4406 boolean best_match_p;
4407
4408 /* AIX compiler got confused when this was combined
4409 with the previous declaration. */
4410 if (same_str_p)
4411 best_match_p = d > match_end;
4412 else
4413 best_match_p = !MATCHING_IN_FIRST_STRING;
4414
4415 DEBUG_PRINT1 ("backtracking.\n");
91c7b85d 4416
2b83a2a4
RM
4417 if (!FAIL_STACK_EMPTY ())
4418 { /* More failure points to try. */
4419
4420 /* If exceeds best match so far, save it. */
4421 if (!best_regs_set || best_match_p)
4422 {
4423 best_regs_set = true;
4424 match_end = d;
91c7b85d 4425
2b83a2a4 4426 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
91c7b85d 4427
cccda09f 4428 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
2b83a2a4
RM
4429 {
4430 best_regstart[mcnt] = regstart[mcnt];
4431 best_regend[mcnt] = regend[mcnt];
4432 }
4433 }
91c7b85d 4434 goto fail;
2b83a2a4
RM
4435 }
4436
4437 /* If no failure points, don't restore garbage. And if
4438 last match is real best match, don't restore second
4439 best one. */
4440 else if (best_regs_set && !best_match_p)
4441 {
4442 restore_best_regs:
4443 /* Restore best match. It may happen that `dend ==
4444 end_match_1' while the restored d is in string2.
4445 For example, the pattern `x.*y.*z' against the
4446 strings `x-' and `y-z-', if the two strings are
4447 not consecutive in memory. */
4448 DEBUG_PRINT1 ("Restoring best registers.\n");
91c7b85d 4449
2b83a2a4
RM
4450 d = match_end;
4451 dend = ((d >= string1 && d <= end1)
4452 ? end_match_1 : end_match_2);
4453
cccda09f 4454 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
2b83a2a4
RM
4455 {
4456 regstart[mcnt] = best_regstart[mcnt];
4457 regend[mcnt] = best_regend[mcnt];
4458 }
4459 }
4460 } /* d != end_match_2 */
4461
4462 succeed_label:
4463 DEBUG_PRINT1 ("Accepting match.\n");
4464
4465 /* If caller wants register contents data back, do it. */
4466 if (regs && !bufp->no_sub)
4467 {
4468 /* Have the register data arrays been allocated? */
4469 if (bufp->regs_allocated == REGS_UNALLOCATED)
4470 { /* No. So allocate them with malloc. We need one
4471 extra element beyond `num_regs' for the `-1' marker
4472 GNU code uses. */
4473 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4474 regs->start = TALLOC (regs->num_regs, regoff_t);
4475 regs->end = TALLOC (regs->num_regs, regoff_t);
4476 if (regs->start == NULL || regs->end == NULL)
4477 {
4478 FREE_VARIABLES ();
4479 return -2;
4480 }
4481 bufp->regs_allocated = REGS_REALLOCATE;
4482 }
4483 else if (bufp->regs_allocated == REGS_REALLOCATE)
4484 { /* Yes. If we need more elements than were already
4485 allocated, reallocate them. If we need fewer, just
4486 leave it alone. */
4487 if (regs->num_regs < num_regs + 1)
4488 {
4489 regs->num_regs = num_regs + 1;
4490 RETALLOC (regs->start, regs->num_regs, regoff_t);
4491 RETALLOC (regs->end, regs->num_regs, regoff_t);
4492 if (regs->start == NULL || regs->end == NULL)
4493 {
4494 FREE_VARIABLES ();
4495 return -2;
4496 }
4497 }
4498 }
4499 else
4500 {
4501 /* These braces fend off a "empty body in an else-statement"
4502 warning under GCC when assert expands to nothing. */
4503 assert (bufp->regs_allocated == REGS_FIXED);
4504 }
4505
4506 /* Convert the pointer data in `regstart' and `regend' to
4507 indices. Register zero has to be set differently,
4508 since we haven't kept track of any info for it. */
4509 if (regs->num_regs > 0)
4510 {
4511 regs->start[0] = pos;
4512 regs->end[0] = (MATCHING_IN_FIRST_STRING
4513 ? ((regoff_t) (d - string1))
4514 : ((regoff_t) (d - string2 + size1)));
4515 }
91c7b85d 4516
2b83a2a4
RM
4517 /* Go through the first `min (num_regs, regs->num_regs)'
4518 registers, since that is all we initialized. */
cccda09f
UD
4519 for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4520 mcnt++)
2b83a2a4
RM
4521 {
4522 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4523 regs->start[mcnt] = regs->end[mcnt] = -1;
4524 else
4525 {
4526 regs->start[mcnt]
4527 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4528 regs->end[mcnt]
4529 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4530 }
4531 }
91c7b85d 4532
2b83a2a4
RM
4533 /* If the regs structure we return has more elements than
4534 were in the pattern, set the extra elements to -1. If
4535 we (re)allocated the registers, this is the case,
4536 because we always allocate enough to have at least one
4537 -1 at the end. */
cccda09f 4538 for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
2b83a2a4
RM
4539 regs->start[mcnt] = regs->end[mcnt] = -1;
4540 } /* regs && !bufp->no_sub */
4541
4542 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4543 nfailure_points_pushed, nfailure_points_popped,
4544 nfailure_points_pushed - nfailure_points_popped);
4545 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4546
91c7b85d
RM
4547 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4548 ? string1
2b83a2a4
RM
4549 : string2 - size1);
4550
4551 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4552
4553 FREE_VARIABLES ();
4554 return mcnt;
4555 }
4556
4557 /* Otherwise match next pattern command. */
4558 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4559 {
4560 /* Ignore these. Used to ignore the n of succeed_n's which
4561 currently have n == 0. */
4562 case no_op:
4563 DEBUG_PRINT1 ("EXECUTING no_op.\n");
4564 break;
4565
4566 case succeed:
4567 DEBUG_PRINT1 ("EXECUTING succeed.\n");
4568 goto succeed_label;
4569
4570 /* Match the next n pattern characters exactly. The following
4571 byte in the pattern defines n, and the n bytes after that
4572 are the characters to match. */
4573 case exactn:
4574 mcnt = *p++;
4575 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4576
4577 /* This is written out as an if-else so we don't waste time
4578 testing `translate' inside the loop. */
4579 if (translate)
4580 {
4581 do
4582 {
4583 PREFETCH ();
03a75825
RM
4584 if ((unsigned char) translate[(unsigned char) *d++]
4585 != (unsigned char) *p++)
2b83a2a4
RM
4586 goto fail;
4587 }
4588 while (--mcnt);
4589 }
4590 else
4591 {
4592 do
4593 {
4594 PREFETCH ();
4595 if (*d++ != (char) *p++) goto fail;
4596 }
4597 while (--mcnt);
4598 }
4599 SET_REGS_MATCHED ();
4600 break;
4601
4602
4603 /* Match any character except possibly a newline or a null. */
4604 case anychar:
4605 DEBUG_PRINT1 ("EXECUTING anychar.\n");
4606
4607 PREFETCH ();
4608
4609 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4610 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4611 goto fail;
4612
4613 SET_REGS_MATCHED ();
4614 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
4615 d++;
4616 break;
4617
4618
4619 case charset:
4620 case charset_not:
4621 {
4622 register unsigned char c;
4623 boolean not = (re_opcode_t) *(p - 1) == charset_not;
4624
4625 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4626
4627 PREFETCH ();
4628 c = TRANSLATE (*d); /* The character to match. */
4629
4630 /* Cast to `unsigned' instead of `unsigned char' in case the
4631 bit list is a full 32 bytes long. */
4632 if (c < (unsigned) (*p * BYTEWIDTH)
4633 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4634 not = !not;
4635
4636 p += 1 + *p;
4637
4638 if (!not) goto fail;
91c7b85d 4639
2b83a2a4
RM
4640 SET_REGS_MATCHED ();
4641 d++;
4642 break;
4643 }
4644
4645
4646 /* The beginning of a group is represented by start_memory.
4647 The arguments are the register number in the next byte, and the
4648 number of groups inner to this one in the next. The text
4649 matched within the group is recorded (in the internal
4650 registers data structure) under the register number. */
4651 case start_memory:
4652 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4653
4654 /* Find out if this group can match the empty string. */
4655 p1 = p; /* To send to group_match_null_string_p. */
91c7b85d 4656
2b83a2a4 4657 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
91c7b85d 4658 REG_MATCH_NULL_STRING_P (reg_info[*p])
2b83a2a4
RM
4659 = group_match_null_string_p (&p1, pend, reg_info);
4660
4661 /* Save the position in the string where we were the last time
4662 we were at this open-group operator in case the group is
4663 operated upon by a repetition operator, e.g., with `(a*)*b'
4664 against `ab'; then we want to ignore where we are now in
4665 the string in case this attempt to match fails. */
4666 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4667 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4668 : regstart[*p];
91c7b85d 4669 DEBUG_PRINT2 (" old_regstart: %d\n",
2b83a2a4
RM
4670 POINTER_TO_OFFSET (old_regstart[*p]));
4671
4672 regstart[*p] = d;
4673 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4674
4675 IS_ACTIVE (reg_info[*p]) = 1;
4676 MATCHED_SOMETHING (reg_info[*p]) = 0;
4677
4678 /* Clear this whenever we change the register activity status. */
4679 set_regs_matched_done = 0;
91c7b85d 4680
2b83a2a4
RM
4681 /* This is the new highest active register. */
4682 highest_active_reg = *p;
91c7b85d 4683
2b83a2a4
RM
4684 /* If nothing was active before, this is the new lowest active
4685 register. */
4686 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4687 lowest_active_reg = *p;
4688
4689 /* Move past the register number and inner group count. */
4690 p += 2;
4691 just_past_start_mem = p;
4692
4693 break;
4694
4695
4696 /* The stop_memory opcode represents the end of a group. Its
4697 arguments are the same as start_memory's: the register
4698 number, and the number of inner groups. */
4699 case stop_memory:
4700 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
91c7b85d 4701
2b83a2a4
RM
4702 /* We need to save the string position the last time we were at
4703 this close-group operator in case the group is operated
4704 upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4705 against `aba'; then we want to ignore where we are now in
4706 the string in case this attempt to match fails. */
4707 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4708 ? REG_UNSET (regend[*p]) ? d : regend[*p]
4709 : regend[*p];
91c7b85d 4710 DEBUG_PRINT2 (" old_regend: %d\n",
2b83a2a4
RM
4711 POINTER_TO_OFFSET (old_regend[*p]));
4712
4713 regend[*p] = d;
4714 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4715
4716 /* This register isn't active anymore. */
4717 IS_ACTIVE (reg_info[*p]) = 0;
4718
4719 /* Clear this whenever we change the register activity status. */
4720 set_regs_matched_done = 0;
4721
4722 /* If this was the only register active, nothing is active
4723 anymore. */
4724 if (lowest_active_reg == highest_active_reg)
4725 {
4726 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4727 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4728 }
4729 else
4730 { /* We must scan for the new highest active register, since
4731 it isn't necessarily one less than now: consider
4732 (a(b)c(d(e)f)g). When group 3 ends, after the f), the
4733 new highest active register is 1. */
4734 unsigned char r = *p - 1;
4735 while (r > 0 && !IS_ACTIVE (reg_info[r]))
4736 r--;
91c7b85d 4737
2b83a2a4
RM
4738 /* If we end up at register zero, that means that we saved
4739 the registers as the result of an `on_failure_jump', not
4740 a `start_memory', and we jumped to past the innermost
4741 `stop_memory'. For example, in ((.)*) we save
4742 registers 1 and 2 as a result of the *, but when we pop
4743 back to the second ), we are at the stop_memory 1.
4744 Thus, nothing is active. */
4745 if (r == 0)
4746 {
4747 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4748 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4749 }
4750 else
4751 highest_active_reg = r;
4752 }
91c7b85d 4753
2b83a2a4
RM
4754 /* If just failed to match something this time around with a
4755 group that's operated on by a repetition operator, try to
4756 force exit from the ``loop'', and restore the register
4757 information for this group that we had before trying this
4758 last match. */
4759 if ((!MATCHED_SOMETHING (reg_info[*p])
4760 || just_past_start_mem == p - 1)
91c7b85d 4761 && (p + 2) < pend)
2b83a2a4
RM
4762 {
4763 boolean is_a_jump_n = false;
91c7b85d 4764
2b83a2a4
RM
4765 p1 = p + 2;
4766 mcnt = 0;
4767 switch ((re_opcode_t) *p1++)
4768 {
4769 case jump_n:
4770 is_a_jump_n = true;
4771 case pop_failure_jump:
4772 case maybe_pop_jump:
4773 case jump:
4774 case dummy_failure_jump:
4775 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4776 if (is_a_jump_n)
4777 p1 += 2;
4778 break;
91c7b85d 4779
2b83a2a4
RM
4780 default:
4781 /* do nothing */ ;
4782 }
4783 p1 += mcnt;
91c7b85d 4784
2b83a2a4
RM
4785 /* If the next operation is a jump backwards in the pattern
4786 to an on_failure_jump right before the start_memory
4787 corresponding to this stop_memory, exit from the loop
4788 by forcing a failure after pushing on the stack the
4789 on_failure_jump's jump in the pattern, and d. */
4790 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4791 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4792 {
4793 /* If this group ever matched anything, then restore
4794 what its registers were before trying this last
4795 failed match, e.g., with `(a*)*b' against `ab' for
4796 regstart[1], and, e.g., with `((a*)*(b*)*)*'
4797 against `aba' for regend[3].
91c7b85d 4798
2b83a2a4
RM
4799 Also restore the registers for inner groups for,
4800 e.g., `((a*)(b*))*' against `aba' (register 3 would
4801 otherwise get trashed). */
91c7b85d 4802
2b83a2a4
RM
4803 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4804 {
91c7b85d
RM
4805 unsigned r;
4806
2b83a2a4 4807 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
91c7b85d 4808
2b83a2a4 4809 /* Restore this and inner groups' (if any) registers. */
cccda09f
UD
4810 for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4811 r++)
2b83a2a4
RM
4812 {
4813 regstart[r] = old_regstart[r];
4814
4815 /* xx why this test? */
4816 if (old_regend[r] >= regstart[r])
4817 regend[r] = old_regend[r];
91c7b85d 4818 }
2b83a2a4
RM
4819 }
4820 p1++;
4821 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4822 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4823
4824 goto fail;
4825 }
4826 }
91c7b85d 4827
2b83a2a4
RM
4828 /* Move past the register number and the inner group count. */
4829 p += 2;
4830 break;
4831
4832
4833 /* \<digit> has been turned into a `duplicate' command which is
4834 followed by the numeric value of <digit> as the register number. */
4835 case duplicate:
4836 {
4837 register const char *d2, *dend2;
4838 int regno = *p++; /* Get which register to match against. */
4839 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4840
4841 /* Can't back reference a group which we've never matched. */
4842 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4843 goto fail;
91c7b85d 4844
2b83a2a4
RM
4845 /* Where in input to try to start matching. */
4846 d2 = regstart[regno];
91c7b85d 4847
2b83a2a4
RM
4848 /* Where to stop matching; if both the place to start and
4849 the place to stop matching are in the same string, then
4850 set to the place to stop, otherwise, for now have to use
4851 the end of the first string. */
4852
91c7b85d 4853 dend2 = ((FIRST_STRING_P (regstart[regno])
2b83a2a4
RM
4854 == FIRST_STRING_P (regend[regno]))
4855 ? regend[regno] : end_match_1);
4856 for (;;)
4857 {
4858 /* If necessary, advance to next segment in register
4859 contents. */
4860 while (d2 == dend2)
4861 {
4862 if (dend2 == end_match_2) break;
4863 if (dend2 == regend[regno]) break;
4864
4865 /* End of string1 => advance to string2. */
4866 d2 = string2;
4867 dend2 = regend[regno];
4868 }
4869 /* At end of register contents => success */
4870 if (d2 == dend2) break;
4871
4872 /* If necessary, advance to next segment in data. */
4873 PREFETCH ();
4874
4875 /* How many characters left in this segment to match. */
4876 mcnt = dend - d;
91c7b85d 4877
2b83a2a4
RM
4878 /* Want how many consecutive characters we can match in
4879 one shot, so, if necessary, adjust the count. */
4880 if (mcnt > dend2 - d2)
4881 mcnt = dend2 - d2;
91c7b85d 4882
2b83a2a4
RM
4883 /* Compare that many; failure if mismatch, else move
4884 past them. */
91c7b85d
RM
4885 if (translate
4886 ? bcmp_translate (d, d2, mcnt, translate)
86187531 4887 : memcmp (d, d2, mcnt))
2b83a2a4
RM
4888 goto fail;
4889 d += mcnt, d2 += mcnt;
4890
4891 /* Do this because we've match some characters. */
4892 SET_REGS_MATCHED ();
4893 }
4894 }
4895 break;
4896
4897
4898 /* begline matches the empty string at the beginning of the string
4899 (unless `not_bol' is set in `bufp'), and, if
4900 `newline_anchor' is set, after newlines. */
4901 case begline:
4902 DEBUG_PRINT1 ("EXECUTING begline.\n");
91c7b85d 4903
2b83a2a4
RM
4904 if (AT_STRINGS_BEG (d))
4905 {
4906 if (!bufp->not_bol) break;
4907 }
4908 else if (d[-1] == '\n' && bufp->newline_anchor)
4909 {
4910 break;
4911 }
4912 /* In all other cases, we fail. */
4913 goto fail;
4914
4915
4916 /* endline is the dual of begline. */
4917 case endline:
4918 DEBUG_PRINT1 ("EXECUTING endline.\n");
4919
4920 if (AT_STRINGS_END (d))
4921 {
4922 if (!bufp->not_eol) break;
4923 }
91c7b85d 4924
2b83a2a4
RM
4925 /* We have to ``prefetch'' the next character. */
4926 else if ((d == end1 ? *string2 : *d) == '\n'
4927 && bufp->newline_anchor)
4928 {
4929 break;
4930 }
4931 goto fail;
4932
4933
4934 /* Match at the very beginning of the data. */
4935 case begbuf:
4936 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4937 if (AT_STRINGS_BEG (d))
4938 break;
4939 goto fail;
4940
4941
4942 /* Match at the very end of the data. */
4943 case endbuf:
4944 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4945 if (AT_STRINGS_END (d))
4946 break;
4947 goto fail;
4948
4949
4950 /* on_failure_keep_string_jump is used to optimize `.*\n'. It
4951 pushes NULL as the value for the string on the stack. Then
4952 `pop_failure_point' will keep the current value for the
4953 string, instead of restoring it. To see why, consider
4954 matching `foo\nbar' against `.*\n'. The .* matches the foo;
4955 then the . fails against the \n. But the next thing we want
4956 to do is match the \n against the \n; if we restored the
4957 string value, we would be back at the foo.
91c7b85d 4958
2b83a2a4
RM
4959 Because this is used only in specific cases, we don't need to
4960 check all the things that `on_failure_jump' does, to make
4961 sure the right things get saved on the stack. Hence we don't
4962 share its code. The only reason to push anything on the
4963 stack at all is that otherwise we would have to change
4964 `anychar's code to do something besides goto fail in this
4965 case; that seems worse than this. */
4966 case on_failure_keep_string_jump:
4967 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
91c7b85d 4968
2b83a2a4 4969 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5929563f
UD
4970#ifdef _LIBC
4971 DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
4972#else
2b83a2a4 4973 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
5929563f 4974#endif
2b83a2a4
RM
4975
4976 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4977 break;
4978
4979
4980 /* Uses of on_failure_jump:
91c7b85d 4981
2b83a2a4
RM
4982 Each alternative starts with an on_failure_jump that points
4983 to the beginning of the next alternative. Each alternative
4984 except the last ends with a jump that in effect jumps past
4985 the rest of the alternatives. (They really jump to the
4986 ending jump of the following alternative, because tensioning
4987 these jumps is a hassle.)
4988
4989 Repeats start with an on_failure_jump that points past both
4990 the repetition text and either the following jump or
4991 pop_failure_jump back to this on_failure_jump. */
4992 case on_failure_jump:
4993 on_failure:
4994 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4995
4996 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5929563f
UD
4997#ifdef _LIBC
4998 DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
4999#else
2b83a2a4 5000 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
5929563f 5001#endif
2b83a2a4
RM
5002
5003 /* If this on_failure_jump comes right before a group (i.e.,
5004 the original * applied to a group), save the information
5005 for that group and all inner ones, so that if we fail back
5006 to this point, the group's information will be correct.
5007 For example, in \(a*\)*\1, we need the preceding group,
8e3cc80f 5008 and in \(zz\(a*\)b*\)\2, we need the inner group. */
2b83a2a4
RM
5009
5010 /* We can't use `p' to check ahead because we push
5011 a failure point to `p + mcnt' after we do this. */
5012 p1 = p;
5013
5014 /* We need to skip no_op's before we look for the
5015 start_memory in case this on_failure_jump is happening as
5016 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
5017 against aba. */
5018 while (p1 < pend && (re_opcode_t) *p1 == no_op)
5019 p1++;
5020
5021 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
5022 {
5023 /* We have a new highest active register now. This will
5024 get reset at the start_memory we are about to get to,
5025 but we will have saved all the registers relevant to
5026 this repetition op, as described above. */
5027 highest_active_reg = *(p1 + 1) + *(p1 + 2);
5028 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5029 lowest_active_reg = *(p1 + 1);
5030 }
5031
5032 DEBUG_PRINT1 (":\n");
5033 PUSH_FAILURE_POINT (p + mcnt, d, -2);
5034 break;
5035
5036
5037 /* A smart repeat ends with `maybe_pop_jump'.
5038 We change it to either `pop_failure_jump' or `jump'. */
5039 case maybe_pop_jump:
5040 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5041 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
5042 {
5043 register unsigned char *p2 = p;
5044
5045 /* Compare the beginning of the repeat with what in the
5046 pattern follows its end. If we can establish that there
5047 is nothing that they would both match, i.e., that we
5048 would have to backtrack because of (as in, e.g., `a*a')
5049 then we can change to pop_failure_jump, because we'll
5050 never have to backtrack.
91c7b85d 5051
2b83a2a4
RM
5052 This is not true in the case of alternatives: in
5053 `(a|ab)*' we do need to backtrack to the `ab' alternative
5054 (e.g., if the string was `ab'). But instead of trying to
5055 detect that here, the alternative has put on a dummy
5056 failure point which is what we will end up popping. */
5057
5058 /* Skip over open/close-group commands.
5059 If what follows this loop is a ...+ construct,
5060 look at what begins its body, since we will have to
5061 match at least one of that. */
5062 while (1)
5063 {
5064 if (p2 + 2 < pend
5065 && ((re_opcode_t) *p2 == stop_memory
5066 || (re_opcode_t) *p2 == start_memory))
5067 p2 += 3;
5068 else if (p2 + 6 < pend
5069 && (re_opcode_t) *p2 == dummy_failure_jump)
5070 p2 += 6;
5071 else
5072 break;
5073 }
5074
5075 p1 = p + mcnt;
5076 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
91c7b85d 5077 to the `maybe_finalize_jump' of this case. Examine what
2b83a2a4
RM
5078 follows. */
5079
5080 /* If we're at the end of the pattern, we can change. */
5081 if (p2 == pend)
5082 {
5083 /* Consider what happens when matching ":\(.*\)"
5084 against ":/". I don't really understand this code
5085 yet. */
5086 p[-3] = (unsigned char) pop_failure_jump;
5087 DEBUG_PRINT1
5088 (" End of pattern: change to `pop_failure_jump'.\n");
5089 }
5090
5091 else if ((re_opcode_t) *p2 == exactn
5092 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
5093 {
5094 register unsigned char c
5095 = *p2 == (unsigned char) endline ? '\n' : p2[2];
5096
5097 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
5098 {
5099 p[-3] = (unsigned char) pop_failure_jump;
5100 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
5101 c, p1[5]);
5102 }
91c7b85d 5103
2b83a2a4
RM
5104 else if ((re_opcode_t) p1[3] == charset
5105 || (re_opcode_t) p1[3] == charset_not)
5106 {
5107 int not = (re_opcode_t) p1[3] == charset_not;
91c7b85d 5108
2b83a2a4
RM
5109 if (c < (unsigned char) (p1[4] * BYTEWIDTH)
5110 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
5111 not = !not;
5112
5113 /* `not' is equal to 1 if c would match, which means
5114 that we can't change to pop_failure_jump. */
5115 if (!not)
5116 {
5117 p[-3] = (unsigned char) pop_failure_jump;
5118 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5119 }
5120 }
5121 }
5122 else if ((re_opcode_t) *p2 == charset)
5123 {
539491ac
UD
5124 /* We win if the first character of the loop is not part
5125 of the charset. */
5126 if ((re_opcode_t) p1[3] == exactn
5127 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
5128 && (p2[2 + p1[5] / BYTEWIDTH]
5129 & (1 << (p1[5] % BYTEWIDTH)))))
a2860282 5130 {
539491ac
UD
5131 p[-3] = (unsigned char) pop_failure_jump;
5132 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
2b83a2a4 5133 }
91c7b85d 5134
2b83a2a4
RM
5135 else if ((re_opcode_t) p1[3] == charset_not)
5136 {
5137 int idx;
5138 /* We win if the charset_not inside the loop
5139 lists every character listed in the charset after. */
5140 for (idx = 0; idx < (int) p2[1]; idx++)
5141 if (! (p2[2 + idx] == 0
5142 || (idx < (int) p1[4]
5143 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
5144 break;
5145
5146 if (idx == p2[1])
5147 {
5148 p[-3] = (unsigned char) pop_failure_jump;
5149 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5150 }
5151 }
5152 else if ((re_opcode_t) p1[3] == charset)
5153 {
5154 int idx;
5155 /* We win if the charset inside the loop
5156 has no overlap with the one after the loop. */
5157 for (idx = 0;
5158 idx < (int) p2[1] && idx < (int) p1[4];
5159 idx++)
5160 if ((p2[2 + idx] & p1[5 + idx]) != 0)
5161 break;
5162
5163 if (idx == p2[1] || idx == p1[4])
5164 {
5165 p[-3] = (unsigned char) pop_failure_jump;
5166 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5167 }
5168 }
5169 }
5170 }
5171 p -= 2; /* Point at relative address again. */
5172 if ((re_opcode_t) p[-1] != pop_failure_jump)
5173 {
5174 p[-1] = (unsigned char) jump;
5175 DEBUG_PRINT1 (" Match => jump.\n");
5176 goto unconditional_jump;
5177 }
5178 /* Note fall through. */
5179
5180
5181 /* The end of a simple repeat has a pop_failure_jump back to
5182 its matching on_failure_jump, where the latter will push a
5183 failure point. The pop_failure_jump takes off failure
5184 points put on by this pop_failure_jump's matching
5185 on_failure_jump; we got through the pattern to here from the
5186 matching on_failure_jump, so didn't fail. */
5187 case pop_failure_jump:
5188 {
5189 /* We need to pass separate storage for the lowest and
5190 highest registers, even though we don't care about the
5191 actual values. Otherwise, we will restore only one
5192 register from the stack, since lowest will == highest in
5193 `pop_failure_point'. */
4cca6b86 5194 active_reg_t dummy_low_reg, dummy_high_reg;
2b83a2a4
RM
5195 unsigned char *pdummy;
5196 const char *sdummy;
5197
5198 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
5199 POP_FAILURE_POINT (sdummy, pdummy,
5200 dummy_low_reg, dummy_high_reg,
5201 reg_dummy, reg_dummy, reg_info_dummy);
5202 }
51702635 5203 /* Note fall through. */
2b83a2a4 5204
5929563f
UD
5205 unconditional_jump:
5206#ifdef _LIBC
5207 DEBUG_PRINT2 ("\n%p: ", p);
5208#else
5209 DEBUG_PRINT2 ("\n0x%x: ", p);
5210#endif
5211 /* Note fall through. */
91c7b85d 5212
2b83a2a4
RM
5213 /* Unconditionally jump (without popping any failure points). */
5214 case jump:
2b83a2a4
RM
5215 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
5216 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5217 p += mcnt; /* Do the jump. */
5929563f
UD
5218#ifdef _LIBC
5219 DEBUG_PRINT2 ("(to %p).\n", p);
5220#else
2b83a2a4 5221 DEBUG_PRINT2 ("(to 0x%x).\n", p);
5929563f 5222#endif
2b83a2a4
RM
5223 break;
5224
91c7b85d 5225
2b83a2a4
RM
5226 /* We need this opcode so we can detect where alternatives end
5227 in `group_match_null_string_p' et al. */
5228 case jump_past_alt:
5229 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
5230 goto unconditional_jump;
5231
5232
5233 /* Normally, the on_failure_jump pushes a failure point, which
5234 then gets popped at pop_failure_jump. We will end up at
5235 pop_failure_jump, also, and with a pattern of, say, `a+', we
5236 are skipping over the on_failure_jump, so we have to push
5237 something meaningless for pop_failure_jump to pop. */
5238 case dummy_failure_jump:
5239 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
5240 /* It doesn't matter what we push for the string here. What
5241 the code at `fail' tests is the value for the pattern. */
bca973bc 5242 PUSH_FAILURE_POINT (NULL, NULL, -2);
2b83a2a4
RM
5243 goto unconditional_jump;
5244
5245
5246 /* At the end of an alternative, we need to push a dummy failure
5247 point in case we are followed by a `pop_failure_jump', because
5248 we don't want the failure point for the alternative to be
5249 popped. For example, matching `(a|ab)*' against `aab'
5250 requires that we match the `ab' alternative. */
5251 case push_dummy_failure:
5252 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
5253 /* See comments just above at `dummy_failure_jump' about the
5254 two zeroes. */
bca973bc 5255 PUSH_FAILURE_POINT (NULL, NULL, -2);
2b83a2a4
RM
5256 break;
5257
5258 /* Have to succeed matching what follows at least n times.
5259 After that, handle like `on_failure_jump'. */
91c7b85d 5260 case succeed_n:
2b83a2a4
RM
5261 EXTRACT_NUMBER (mcnt, p + 2);
5262 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5263
5264 assert (mcnt >= 0);
5265 /* Originally, this is how many times we HAVE to succeed. */
5266 if (mcnt > 0)
5267 {
5268 mcnt--;
5269 p += 2;
5270 STORE_NUMBER_AND_INCR (p, mcnt);
5929563f
UD
5271#ifdef _LIBC
5272 DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt);
5273#else
5274 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt);
5275#endif
2b83a2a4
RM
5276 }
5277 else if (mcnt == 0)
5278 {
5929563f
UD
5279#ifdef _LIBC
5280 DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2);
5281#else
2b83a2a4 5282 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
5929563f 5283#endif
2b83a2a4
RM
5284 p[2] = (unsigned char) no_op;
5285 p[3] = (unsigned char) no_op;
5286 goto on_failure;
5287 }
5288 break;
91c7b85d
RM
5289
5290 case jump_n:
2b83a2a4
RM
5291 EXTRACT_NUMBER (mcnt, p + 2);
5292 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5293
5294 /* Originally, this is how many times we CAN jump. */
5295 if (mcnt)
5296 {
5297 mcnt--;
5298 STORE_NUMBER (p + 2, mcnt);
5929563f
UD
5299#ifdef _LIBC
5300 DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt);
5301#else
5302 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt);
5303#endif
91c7b85d 5304 goto unconditional_jump;
2b83a2a4
RM
5305 }
5306 /* If don't have to jump any more, skip over the rest of command. */
91c7b85d
RM
5307 else
5308 p += 4;
2b83a2a4 5309 break;
91c7b85d 5310
2b83a2a4
RM
5311 case set_number_at:
5312 {
5313 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5314
5315 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5316 p1 = p + mcnt;
5317 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5929563f
UD
5318#ifdef _LIBC
5319 DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
5320#else
2b83a2a4 5321 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
5929563f 5322#endif
2b83a2a4
RM
5323 STORE_NUMBER (p1, mcnt);
5324 break;
5325 }
5326
102800e0
RM
5327#if 0
5328 /* The DEC Alpha C compiler 3.x generates incorrect code for the
5329 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
5330 AT_WORD_BOUNDARY, so this code is disabled. Expanding the
5331 macro and introducing temporary variables works around the bug. */
5332
5333 case wordbound:
5334 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5335 if (AT_WORD_BOUNDARY (d))
2b83a2a4 5336 break;
102800e0 5337 goto fail;
2b83a2a4
RM
5338
5339 case notwordbound:
102800e0 5340 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
2b83a2a4
RM
5341 if (AT_WORD_BOUNDARY (d))
5342 goto fail;
102800e0
RM
5343 break;
5344#else
5345 case wordbound:
5346 {
5347 boolean prevchar, thischar;
5348
5349 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5350 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5351 break;
5352
5353 prevchar = WORDCHAR_P (d - 1);
5354 thischar = WORDCHAR_P (d);
5355 if (prevchar != thischar)
5356 break;
5357 goto fail;
5358 }
5359
5360 case notwordbound:
5361 {
5362 boolean prevchar, thischar;
5363
5364 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5365 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5366 goto fail;
5367
5368 prevchar = WORDCHAR_P (d - 1);
5369 thischar = WORDCHAR_P (d);
5370 if (prevchar != thischar)
5371 goto fail;
5372 break;
5373 }
5374#endif
2b83a2a4
RM
5375
5376 case wordbeg:
5377 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5378 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5379 break;
5380 goto fail;
5381
5382 case wordend:
5383 DEBUG_PRINT1 ("EXECUTING wordend.\n");
5384 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
5385 && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
5386 break;
5387 goto fail;
5388
5389#ifdef emacs
5390 case before_dot:
5391 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5392 if (PTR_CHAR_POS ((unsigned char *) d) >= point)
5393 goto fail;
5394 break;
91c7b85d 5395
2b83a2a4
RM
5396 case at_dot:
5397 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5398 if (PTR_CHAR_POS ((unsigned char *) d) != point)
5399 goto fail;
5400 break;
91c7b85d 5401
2b83a2a4
RM
5402 case after_dot:
5403 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5404 if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5405 goto fail;
5406 break;
2b83a2a4
RM
5407
5408 case syntaxspec:
5409 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5410 mcnt = *p++;
5411 goto matchsyntax;
5412
5413 case wordchar:
5414 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5415 mcnt = (int) Sword;
5416 matchsyntax:
5417 PREFETCH ();
5418 /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
5419 d++;
5420 if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5421 goto fail;
5422 SET_REGS_MATCHED ();
5423 break;
5424
5425 case notsyntaxspec:
5426 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5427 mcnt = *p++;
5428 goto matchnotsyntax;
5429
5430 case notwordchar:
5431 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5432 mcnt = (int) Sword;
5433 matchnotsyntax:
5434 PREFETCH ();
5435 /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
5436 d++;
5437 if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5438 goto fail;
5439 SET_REGS_MATCHED ();
5440 break;
5441
5442#else /* not emacs */
5443 case wordchar:
5444 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5445 PREFETCH ();
5446 if (!WORDCHAR_P (d))
5447 goto fail;
5448 SET_REGS_MATCHED ();
5449 d++;
5450 break;
91c7b85d 5451
2b83a2a4
RM
5452 case notwordchar:
5453 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5454 PREFETCH ();
5455 if (WORDCHAR_P (d))
5456 goto fail;
5457 SET_REGS_MATCHED ();
5458 d++;
5459 break;
5460#endif /* not emacs */
91c7b85d 5461
2b83a2a4
RM
5462 default:
5463 abort ();
5464 }
5465 continue; /* Successfully executed one pattern command; keep going. */
5466
5467
5468 /* We goto here if a matching operation fails. */
5469 fail:
5470 if (!FAIL_STACK_EMPTY ())
5471 { /* A restart point is known. Restore to that state. */
5472 DEBUG_PRINT1 ("\nFAIL:\n");
5473 POP_FAILURE_POINT (d, p,
5474 lowest_active_reg, highest_active_reg,
5475 regstart, regend, reg_info);
5476
5477 /* If this failure point is a dummy, try the next one. */
5478 if (!p)
5479 goto fail;
5480
5481 /* If we failed to the end of the pattern, don't examine *p. */
5482 assert (p <= pend);
5483 if (p < pend)
5484 {
5485 boolean is_a_jump_n = false;
91c7b85d 5486
2b83a2a4
RM
5487 /* If failed to a backwards jump that's part of a repetition
5488 loop, need to pop this failure point and use the next one. */
5489 switch ((re_opcode_t) *p)
5490 {
5491 case jump_n:
5492 is_a_jump_n = true;
5493 case maybe_pop_jump:
5494 case pop_failure_jump:
5495 case jump:
5496 p1 = p + 1;
5497 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
91c7b85d 5498 p1 += mcnt;
2b83a2a4
RM
5499
5500 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5501 || (!is_a_jump_n
5502 && (re_opcode_t) *p1 == on_failure_jump))
5503 goto fail;
5504 break;
5505 default:
5506 /* do nothing */ ;
5507 }
5508 }
5509
5510 if (d >= string1 && d <= end1)
5511 dend = end_match_1;
5512 }
5513 else
5514 break; /* Matching at this starting point really fails. */
5515 } /* for (;;) */
5516
5517 if (best_regs_set)
5518 goto restore_best_regs;
5519
5520 FREE_VARIABLES ();
5521
5522 return -1; /* Failure to match. */
5523} /* re_match_2 */
5524\f
5525/* Subroutine definitions for re_match_2. */
5526
5527
5528/* We are passed P pointing to a register number after a start_memory.
91c7b85d 5529
2b83a2a4
RM
5530 Return true if the pattern up to the corresponding stop_memory can
5531 match the empty string, and false otherwise.
91c7b85d 5532
2b83a2a4
RM
5533 If we find the matching stop_memory, sets P to point to one past its number.
5534 Otherwise, sets P to an undefined byte less than or equal to END.
5535
5536 We don't handle duplicates properly (yet). */
5537
5538static boolean
5539group_match_null_string_p (p, end, reg_info)
5540 unsigned char **p, *end;
5541 register_info_type *reg_info;
5542{
5543 int mcnt;
5544 /* Point to after the args to the start_memory. */
5545 unsigned char *p1 = *p + 2;
91c7b85d 5546
2b83a2a4
RM
5547 while (p1 < end)
5548 {
5549 /* Skip over opcodes that can match nothing, and return true or
5550 false, as appropriate, when we get to one that can't, or to the
5551 matching stop_memory. */
91c7b85d 5552
2b83a2a4
RM
5553 switch ((re_opcode_t) *p1)
5554 {
5555 /* Could be either a loop or a series of alternatives. */
5556 case on_failure_jump:
5557 p1++;
5558 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
91c7b85d 5559
2b83a2a4
RM
5560 /* If the next operation is not a jump backwards in the
5561 pattern. */
5562
5563 if (mcnt >= 0)
5564 {
5565 /* Go through the on_failure_jumps of the alternatives,
5566 seeing if any of the alternatives cannot match nothing.
5567 The last alternative starts with only a jump,
5568 whereas the rest start with on_failure_jump and end
5569 with a jump, e.g., here is the pattern for `a|b|c':
5570
5571 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5572 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
91c7b85d 5573 /exactn/1/c
2b83a2a4
RM
5574
5575 So, we have to first go through the first (n-1)
5576 alternatives and then deal with the last one separately. */
5577
5578
5579 /* Deal with the first (n-1) alternatives, which start
5580 with an on_failure_jump (see above) that jumps to right
5581 past a jump_past_alt. */
5582
5583 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5584 {
5585 /* `mcnt' holds how many bytes long the alternative
5586 is, including the ending `jump_past_alt' and
5587 its number. */
5588
91c7b85d 5589 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
2b83a2a4
RM
5590 reg_info))
5591 return false;
5592
5593 /* Move to right after this alternative, including the
5594 jump_past_alt. */
91c7b85d 5595 p1 += mcnt;
2b83a2a4
RM
5596
5597 /* Break if it's the beginning of an n-th alternative
5598 that doesn't begin with an on_failure_jump. */
5599 if ((re_opcode_t) *p1 != on_failure_jump)
5600 break;
91c7b85d 5601
2b83a2a4
RM
5602 /* Still have to check that it's not an n-th
5603 alternative that starts with an on_failure_jump. */
5604 p1++;
5605 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5606 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5607 {
5608 /* Get to the beginning of the n-th alternative. */
5609 p1 -= 3;
5610 break;
5611 }
5612 }
5613
5614 /* Deal with the last alternative: go back and get number
5615 of the `jump_past_alt' just before it. `mcnt' contains
5616 the length of the alternative. */
5617 EXTRACT_NUMBER (mcnt, p1 - 2);
5618
5619 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5620 return false;
5621
5622 p1 += mcnt; /* Get past the n-th alternative. */
5623 } /* if mcnt > 0 */
5624 break;
5625
91c7b85d 5626
2b83a2a4
RM
5627 case stop_memory:
5628 assert (p1[1] == **p);
5629 *p = p1 + 2;
5630 return true;
5631
91c7b85d
RM
5632
5633 default:
2b83a2a4
RM
5634 if (!common_op_match_null_string_p (&p1, end, reg_info))
5635 return false;
5636 }
5637 } /* while p1 < end */
5638
5639 return false;
5640} /* group_match_null_string_p */
5641
5642
5643/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5644 It expects P to be the first byte of a single alternative and END one
5645 byte past the last. The alternative can contain groups. */
91c7b85d 5646
2b83a2a4
RM
5647static boolean
5648alt_match_null_string_p (p, end, reg_info)
5649 unsigned char *p, *end;
5650 register_info_type *reg_info;
5651{
5652 int mcnt;
5653 unsigned char *p1 = p;
91c7b85d 5654
2b83a2a4
RM
5655 while (p1 < end)
5656 {
91c7b85d 5657 /* Skip over opcodes that can match nothing, and break when we get
2b83a2a4 5658 to one that can't. */
91c7b85d 5659
2b83a2a4
RM
5660 switch ((re_opcode_t) *p1)
5661 {
5662 /* It's a loop. */
5663 case on_failure_jump:
5664 p1++;
5665 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5666 p1 += mcnt;
5667 break;
91c7b85d
RM
5668
5669 default:
2b83a2a4
RM
5670 if (!common_op_match_null_string_p (&p1, end, reg_info))
5671 return false;
5672 }
5673 } /* while p1 < end */
5674
5675 return true;
5676} /* alt_match_null_string_p */
5677
5678
5679/* Deals with the ops common to group_match_null_string_p and
91c7b85d
RM
5680 alt_match_null_string_p.
5681
2b83a2a4
RM
5682 Sets P to one after the op and its arguments, if any. */
5683
5684static boolean
5685common_op_match_null_string_p (p, end, reg_info)
5686 unsigned char **p, *end;
5687 register_info_type *reg_info;
5688{
5689 int mcnt;
5690 boolean ret;
5691 int reg_no;
5692 unsigned char *p1 = *p;
5693
5694 switch ((re_opcode_t) *p1++)
5695 {
5696 case no_op:
5697 case begline:
5698 case endline:
5699 case begbuf:
5700 case endbuf:
5701 case wordbeg:
5702 case wordend:
5703 case wordbound:
5704 case notwordbound:
5705#ifdef emacs
5706 case before_dot:
5707 case at_dot:
5708 case after_dot:
5709#endif
5710 break;
5711
5712 case start_memory:
5713 reg_no = *p1;
5714 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5715 ret = group_match_null_string_p (&p1, end, reg_info);
91c7b85d 5716
2b83a2a4
RM
5717 /* Have to set this here in case we're checking a group which
5718 contains a group and a back reference to it. */
5719
5720 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5721 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5722
5723 if (!ret)
5724 return false;
5725 break;
91c7b85d 5726
2b83a2a4
RM
5727 /* If this is an optimized succeed_n for zero times, make the jump. */
5728 case jump:
5729 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5730 if (mcnt >= 0)
5731 p1 += mcnt;
5732 else
5733 return false;
5734 break;
5735
5736 case succeed_n:
5737 /* Get to the number of times to succeed. */
91c7b85d 5738 p1 += 2;
2b83a2a4
RM
5739 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5740
5741 if (mcnt == 0)
5742 {
5743 p1 -= 4;
5744 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5745 p1 += mcnt;
5746 }
5747 else
5748 return false;
5749 break;
5750
91c7b85d 5751 case duplicate:
2b83a2a4
RM
5752 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5753 return false;
5754 break;
5755
5756 case set_number_at:
5757 p1 += 4;
5758
5759 default:
5760 /* All other opcodes mean we cannot match the empty string. */
5761 return false;
5762 }
5763
5764 *p = p1;
5765 return true;
5766} /* common_op_match_null_string_p */
5767
5768
5769/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5770 bytes; nonzero otherwise. */
91c7b85d 5771
2b83a2a4
RM
5772static int
5773bcmp_translate (s1, s2, len, translate)
4cca6b86 5774 const char *s1, *s2;
2b83a2a4 5775 register int len;
03a75825 5776 RE_TRANSLATE_TYPE translate;
2b83a2a4 5777{
4cca6b86
UD
5778 register const unsigned char *p1 = (const unsigned char *) s1;
5779 register const unsigned char *p2 = (const unsigned char *) s2;
2b83a2a4
RM
5780 while (len)
5781 {
5782 if (translate[*p1++] != translate[*p2++]) return 1;
5783 len--;
5784 }
5785 return 0;
5786}
5787\f
5788/* Entry points for GNU code. */
5789
5790/* re_compile_pattern is the GNU regular expression compiler: it
5791 compiles PATTERN (of length SIZE) and puts the result in BUFP.
5792 Returns 0 if the pattern was valid, otherwise an error string.
91c7b85d 5793
2b83a2a4
RM
5794 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5795 are set in BUFP on entry.
91c7b85d 5796
2b83a2a4
RM
5797 We call regex_compile to do the actual compilation. */
5798
5799const char *
5800re_compile_pattern (pattern, length, bufp)
5801 const char *pattern;
4cca6b86 5802 size_t length;
2b83a2a4
RM
5803 struct re_pattern_buffer *bufp;
5804{
5805 reg_errcode_t ret;
91c7b85d 5806
2b83a2a4
RM
5807 /* GNU code is written to assume at least RE_NREGS registers will be set
5808 (and at least one extra will be -1). */
5809 bufp->regs_allocated = REGS_UNALLOCATED;
91c7b85d 5810
2b83a2a4
RM
5811 /* And GNU code determines whether or not to get register information
5812 by passing null for the REGS argument to re_match, etc., not by
5813 setting no_sub. */
5814 bufp->no_sub = 0;
91c7b85d 5815
2b83a2a4
RM
5816 /* Match anchors at newline. */
5817 bufp->newline_anchor = 1;
91c7b85d 5818
2b83a2a4
RM
5819 ret = regex_compile (pattern, length, re_syntax_options, bufp);
5820
5821 if (!ret)
5822 return NULL;
c4563d2d 5823 return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
91c7b85d 5824}
2ad4fab2
UD
5825#ifdef _LIBC
5826weak_alias (__re_compile_pattern, re_compile_pattern)
5827#endif
2b83a2a4
RM
5828\f
5829/* Entry points compatible with 4.2 BSD regex library. We don't define
5830 them unless specifically requested. */
5831
86187531 5832#if defined _REGEX_RE_COMP || defined _LIBC
2b83a2a4
RM
5833
5834/* BSD has one and only one pattern buffer. */
5835static struct re_pattern_buffer re_comp_buf;
5836
51702635
UD
5837char *
5838#ifdef _LIBC
5839/* Make these definitions weak in libc, so POSIX programs can redefine
5840 these names if they don't use our functions, and still use
5841 regcomp/regexec below without link errors. */
5842weak_function
5843#endif
2b83a2a4
RM
5844re_comp (s)
5845 const char *s;
5846{
5847 reg_errcode_t ret;
91c7b85d 5848
2b83a2a4
RM
5849 if (!s)
5850 {
5851 if (!re_comp_buf.buffer)
5852 return gettext ("No previous regular expression");
5853 return 0;
5854 }
5855
5856 if (!re_comp_buf.buffer)
5857 {
5858 re_comp_buf.buffer = (unsigned char *) malloc (200);
5859 if (re_comp_buf.buffer == NULL)
c4563d2d
UD
5860 return (char *) gettext (re_error_msgid
5861 + re_error_msgid_idx[(int) REG_ESPACE]);
2b83a2a4
RM
5862 re_comp_buf.allocated = 200;
5863
5864 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5865 if (re_comp_buf.fastmap == NULL)
c4563d2d
UD
5866 return (char *) gettext (re_error_msgid
5867 + re_error_msgid_idx[(int) REG_ESPACE]);
2b83a2a4
RM
5868 }
5869
5870 /* Since `re_exec' always passes NULL for the `regs' argument, we
5871 don't need to initialize the pattern buffer fields which affect it. */
5872
5873 /* Match anchors at newlines. */
5874 re_comp_buf.newline_anchor = 1;
5875
5876 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
91c7b85d 5877
2b83a2a4
RM
5878 if (!ret)
5879 return NULL;
5880
5881 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
c4563d2d 5882 return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
2b83a2a4
RM
5883}
5884
5885
51702635
UD
5886int
5887#ifdef _LIBC
5888weak_function
5889#endif
2b83a2a4
RM
5890re_exec (s)
5891 const char *s;
5892{
5893 const int len = strlen (s);
5894 return
5895 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5896}
dc997231 5897
2b83a2a4
RM
5898#endif /* _REGEX_RE_COMP */
5899\f
5900/* POSIX.2 functions. Don't define these for Emacs. */
5901
5902#ifndef emacs
5903
5904/* regcomp takes a regular expression as a string and compiles it.
5905
5906 PREG is a regex_t *. We do not expect any fields to be initialized,
5907 since POSIX says we shouldn't. Thus, we set
5908
5909 `buffer' to the compiled pattern;
5910 `used' to the length of the compiled pattern;
5911 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5912 REG_EXTENDED bit in CFLAGS is set; otherwise, to
5913 RE_SYNTAX_POSIX_BASIC;
5914 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
a5d1d726
UD
5915 `fastmap' to an allocated space for the fastmap;
5916 `fastmap_accurate' to zero;
2b83a2a4
RM
5917 `re_nsub' to the number of subexpressions in PATTERN.
5918
5919 PATTERN is the address of the pattern string.
5920
5921 CFLAGS is a series of bits which affect compilation.
5922
5923 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5924 use POSIX basic syntax.
5925
5926 If REG_NEWLINE is set, then . and [^...] don't match newline.
5927 Also, regexec will try a match beginning after every newline.
5928
5929 If REG_ICASE is set, then we considers upper- and lowercase
5930 versions of letters to be equivalent when matching.
5931
5932 If REG_NOSUB is set, then when PREG is passed to regexec, that
5933 routine will report only success or failure, and nothing about the
5934 registers.
5935
5936 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
5937 the return codes and their meanings.) */
5938
5939int
5940regcomp (preg, pattern, cflags)
5941 regex_t *preg;
91c7b85d 5942 const char *pattern;
2b83a2a4
RM
5943 int cflags;
5944{
5945 reg_errcode_t ret;
4cca6b86 5946 reg_syntax_t syntax
2b83a2a4
RM
5947 = (cflags & REG_EXTENDED) ?
5948 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5949
5950 /* regex_compile will allocate the space for the compiled pattern. */
5951 preg->buffer = 0;
5952 preg->allocated = 0;
5953 preg->used = 0;
91c7b85d 5954
a5d1d726
UD
5955 /* Try to allocate space for the fastmap. */
5956 preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
91c7b85d 5957
2b83a2a4
RM
5958 if (cflags & REG_ICASE)
5959 {
5960 unsigned i;
91c7b85d 5961
03a75825
RM
5962 preg->translate
5963 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5964 * sizeof (*(RE_TRANSLATE_TYPE)0));
2b83a2a4
RM
5965 if (preg->translate == NULL)
5966 return (int) REG_ESPACE;
5967
5968 /* Map uppercase characters to corresponding lowercase ones. */
5969 for (i = 0; i < CHAR_SET_SIZE; i++)
4caef86c 5970 preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
2b83a2a4
RM
5971 }
5972 else
5973 preg->translate = NULL;
5974
5975 /* If REG_NEWLINE is set, newlines are treated differently. */
5976 if (cflags & REG_NEWLINE)
5977 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
5978 syntax &= ~RE_DOT_NEWLINE;
5979 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5980 /* It also changes the matching behavior. */
5981 preg->newline_anchor = 1;
5982 }
5983 else
5984 preg->newline_anchor = 0;
5985
5986 preg->no_sub = !!(cflags & REG_NOSUB);
5987
91c7b85d 5988 /* POSIX says a null character in the pattern terminates it, so we
2b83a2a4
RM
5989 can use strlen here in compiling the pattern. */
5990 ret = regex_compile (pattern, strlen (pattern), syntax, preg);
91c7b85d 5991
2b83a2a4
RM
5992 /* POSIX doesn't distinguish between an unmatched open-group and an
5993 unmatched close-group: both are REG_EPAREN. */
5994 if (ret == REG_ERPAREN) ret = REG_EPAREN;
91c7b85d 5995
a5d1d726
UD
5996 if (ret == REG_NOERROR && preg->fastmap)
5997 {
5998 /* Compute the fastmap now, since regexec cannot modify the pattern
5999 buffer. */
6000 if (re_compile_fastmap (preg) == -2)
6001 {
6002 /* Some error occured while computing the fastmap, just forget
6003 about it. */
6004 free (preg->fastmap);
6005 preg->fastmap = NULL;
6006 }
6007 }
6008
2b83a2a4
RM
6009 return (int) ret;
6010}
2ad4fab2
UD
6011#ifdef _LIBC
6012weak_alias (__regcomp, regcomp)
6013#endif
2b83a2a4
RM
6014
6015
6016/* regexec searches for a given pattern, specified by PREG, in the
6017 string STRING.
91c7b85d 6018
2b83a2a4
RM
6019 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6020 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
6021 least NMATCH elements, and we set them to the offsets of the
6022 corresponding matched substrings.
91c7b85d 6023
2b83a2a4
RM
6024 EFLAGS specifies `execution flags' which affect matching: if
6025 REG_NOTBOL is set, then ^ does not match at the beginning of the
6026 string; if REG_NOTEOL is set, then $ does not match at the end.
91c7b85d 6027
2b83a2a4
RM
6028 We return 0 if we find a match and REG_NOMATCH if not. */
6029
6030int
6031regexec (preg, string, nmatch, pmatch, eflags)
6032 const regex_t *preg;
91c7b85d
RM
6033 const char *string;
6034 size_t nmatch;
6035 regmatch_t pmatch[];
2b83a2a4
RM
6036 int eflags;
6037{
6038 int ret;
6039 struct re_registers regs;
6040 regex_t private_preg;
6041 int len = strlen (string);
6042 boolean want_reg_info = !preg->no_sub && nmatch > 0;
6043
6044 private_preg = *preg;
91c7b85d 6045
2b83a2a4
RM
6046 private_preg.not_bol = !!(eflags & REG_NOTBOL);
6047 private_preg.not_eol = !!(eflags & REG_NOTEOL);
91c7b85d 6048
2b83a2a4
RM
6049 /* The user has told us exactly how many registers to return
6050 information about, via `nmatch'. We have to pass that on to the
6051 matching routines. */
6052 private_preg.regs_allocated = REGS_FIXED;
91c7b85d 6053
2b83a2a4
RM
6054 if (want_reg_info)
6055 {
6056 regs.num_regs = nmatch;
a5d1d726
UD
6057 regs.start = TALLOC (nmatch * 2, regoff_t);
6058 if (regs.start == NULL)
2b83a2a4 6059 return (int) REG_NOMATCH;
a5d1d726 6060 regs.end = regs.start + nmatch;
2b83a2a4
RM
6061 }
6062
6063 /* Perform the searching operation. */
6064 ret = re_search (&private_preg, string, len,
6065 /* start: */ 0, /* range: */ len,
6066 want_reg_info ? &regs : (struct re_registers *) 0);
91c7b85d 6067
2b83a2a4
RM
6068 /* Copy the register information to the POSIX structure. */
6069 if (want_reg_info)
6070 {
6071 if (ret >= 0)
6072 {
6073 unsigned r;
6074
6075 for (r = 0; r < nmatch; r++)
6076 {
6077 pmatch[r].rm_so = regs.start[r];
6078 pmatch[r].rm_eo = regs.end[r];
6079 }
6080 }
6081
6082 /* If we needed the temporary register info, free the space now. */
6083 free (regs.start);
2b83a2a4
RM
6084 }
6085
6086 /* We want zero return to mean success, unlike `re_search'. */
6087 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
6088}
2ad4fab2
UD
6089#ifdef _LIBC
6090weak_alias (__regexec, regexec)
6091#endif
2b83a2a4
RM
6092
6093
6094/* Returns a message corresponding to an error code, ERRCODE, returned
6095 from either regcomp or regexec. We don't use PREG here. */
6096
6097size_t
7cabd57c 6098regerror (errcode, preg, errbuf, errbuf_size)
2b83a2a4
RM
6099 int errcode;
6100 const regex_t *preg;
6101 char *errbuf;
6102 size_t errbuf_size;
6103{
6104 const char *msg;
6105 size_t msg_size;
6106
6107 if (errcode < 0
c4563d2d
UD
6108 || errcode >= (int) (sizeof (re_error_msgid_idx)
6109 / sizeof (re_error_msgid_idx[0])))
91c7b85d 6110 /* Only error codes returned by the rest of the code should be passed
2b83a2a4
RM
6111 to this routine. If we are given anything else, or if other regex
6112 code generates an invalid error code, then the program has a bug.
6113 Dump core so we can fix it. */
6114 abort ();
6115
c4563d2d 6116 msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
2b83a2a4
RM
6117
6118 msg_size = strlen (msg) + 1; /* Includes the null. */
91c7b85d 6119
2b83a2a4
RM
6120 if (errbuf_size != 0)
6121 {
6122 if (msg_size > errbuf_size)
6123 {
86187531
UD
6124#if defined HAVE_MEMPCPY || defined _LIBC
6125 *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
6126#else
6127 memcpy (errbuf, msg, errbuf_size - 1);
2b83a2a4 6128 errbuf[errbuf_size - 1] = 0;
86187531 6129#endif
2b83a2a4
RM
6130 }
6131 else
86187531 6132 memcpy (errbuf, msg, msg_size);
2b83a2a4
RM
6133 }
6134
6135 return msg_size;
6136}
2ad4fab2
UD
6137#ifdef _LIBC
6138weak_alias (__regerror, regerror)
6139#endif
2b83a2a4
RM
6140
6141
6142/* Free dynamically allocated space used by PREG. */
6143
6144void
6145regfree (preg)
6146 regex_t *preg;
6147{
6148 if (preg->buffer != NULL)
6149 free (preg->buffer);
6150 preg->buffer = NULL;
91c7b85d 6151
2b83a2a4
RM
6152 preg->allocated = 0;
6153 preg->used = 0;
6154
6155 if (preg->fastmap != NULL)
6156 free (preg->fastmap);
6157 preg->fastmap = NULL;
6158 preg->fastmap_accurate = 0;
6159
6160 if (preg->translate != NULL)
6161 free (preg->translate);
6162 preg->translate = NULL;
6163}
2ad4fab2
UD
6164#ifdef _LIBC
6165weak_alias (__regfree, regfree)
6166#endif
2b83a2a4
RM
6167
6168#endif /* not emacs */
This page took 0.81319 seconds and 5 git commands to generate.