]> sourceware.org Git - glibc.git/blob - posix/fnmatch_loop.c
Update.
[glibc.git] / posix / fnmatch_loop.c
1 /* Copyright (C) 1991-1993, 1996-1999, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
13
14 You should have received a copy of the GNU Library General Public
15 License along with this library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
18
19 /* Match STRING against the filename pattern PATTERN, returning zero if
20 it matches, nonzero if not. */
21 static int FCT (const CHAR *pattern, const CHAR *string,
22 int no_leading_period, int flags) internal_function;
23
24 static int
25 internal_function
26 FCT (pattern, string, no_leading_period, flags)
27 const CHAR *pattern;
28 const CHAR *string;
29 int no_leading_period;
30 int flags;
31 {
32 register const CHAR *p = pattern, *n = string;
33 register UCHAR c;
34 #ifdef _LIBC
35 const UCHAR *collseq = (const UCHAR *)
36 _NL_CURRENT(LC_COLLATE, CONCAT(_NL_COLLATE_COLLSEQ,SUFFIX));
37 # ifdef WIDE_CHAR_VERSION
38 const wint_t *names = (const wint_t *)
39 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_NAMES);
40 size_t size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE);
41 size_t layers = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS);
42 # endif
43 #endif
44
45 while ((c = *p++) != L('\0'))
46 {
47 c = FOLD (c);
48
49 switch (c)
50 {
51 case L('?'):
52 if (*n == L('\0'))
53 return FNM_NOMATCH;
54 else if (*n == L('/') && (flags & FNM_FILE_NAME))
55 return FNM_NOMATCH;
56 else if (*n == L('.') && no_leading_period
57 && (n == string
58 || (n[-1] == L('/') && (flags & FNM_FILE_NAME))))
59 return FNM_NOMATCH;
60 break;
61
62 case L('\\'):
63 if (!(flags & FNM_NOESCAPE))
64 {
65 c = *p++;
66 if (c == L('\0'))
67 /* Trailing \ loses. */
68 return FNM_NOMATCH;
69 c = FOLD (c);
70 }
71 if (FOLD ((UCHAR) *n) != c)
72 return FNM_NOMATCH;
73 break;
74
75 case L('*'):
76 if (*n == L('.') && no_leading_period
77 && (n == string
78 || (n[-1] == L('/') && (flags & FNM_FILE_NAME))))
79 return FNM_NOMATCH;
80
81 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
82 {
83 if (*n == L('/') && (flags & FNM_FILE_NAME))
84 /* A slash does not match a wildcard under FNM_FILE_NAME. */
85 return FNM_NOMATCH;
86 else if (c == L('?'))
87 {
88 /* A ? needs to match one character. */
89 if (*n == L('\0'))
90 /* There isn't another character; no match. */
91 return FNM_NOMATCH;
92 else
93 /* One character of the string is consumed in matching
94 this ? wildcard, so *??? won't match if there are
95 less than three characters. */
96 ++n;
97 }
98 }
99
100 if (c == L('\0'))
101 /* The wildcard(s) is/are the last element of the pattern.
102 If the name is a file name and contains another slash
103 this does mean it cannot match. If the FNM_LEADING_DIR
104 flag is set and exactly one slash is following, we have
105 a match. */
106 {
107 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
108
109 if (flags & FNM_FILE_NAME)
110 {
111 const CHAR *slashp = STRCHR (n, L('/'));
112
113 if (flags & FNM_LEADING_DIR)
114 {
115 if (slashp != NULL
116 && STRCHR (slashp + 1, L('/')) == NULL)
117 result = 0;
118 }
119 else
120 {
121 if (slashp == NULL)
122 result = 0;
123 }
124 }
125
126 return result;
127 }
128 else
129 {
130 const CHAR *endp;
131
132 endp = STRCHRNUL (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'));
133
134 if (c == L('['))
135 {
136 int flags2 = ((flags & FNM_FILE_NAME)
137 ? flags : (flags & ~FNM_PERIOD));
138
139 for (--p; n < endp; ++n)
140 if (FCT (p, n, (no_leading_period
141 && (n == string
142 || (n[-1] == L('/')
143 && (flags & FNM_FILE_NAME)))),
144 flags2) == 0)
145 return 0;
146 }
147 else if (c == L('/') && (flags & FNM_FILE_NAME))
148 {
149 while (*n != L('\0') && *n != L('/'))
150 ++n;
151 if (*n == L('/')
152 && (FCT (p, n + 1, flags & FNM_PERIOD, flags) == 0))
153 return 0;
154 }
155 else
156 {
157 int flags2 = ((flags & FNM_FILE_NAME)
158 ? flags : (flags & ~FNM_PERIOD));
159
160 if (c == L('\\') && !(flags & FNM_NOESCAPE))
161 c = *p;
162 c = FOLD (c);
163 for (--p; n < endp; ++n)
164 if (FOLD ((UCHAR) *n) == c
165 && (FCT (p, n, (no_leading_period
166 && (n == string
167 || (n[-1] == L('/')
168 && (flags & FNM_FILE_NAME)))),
169 flags2) == 0))
170 return 0;
171 }
172 }
173
174 /* If we come here no match is possible with the wildcard. */
175 return FNM_NOMATCH;
176
177 case L('['):
178 {
179 static int posixly_correct;
180 /* Nonzero if the sense of the character class is inverted. */
181 register int not;
182 CHAR cold;
183
184 if (posixly_correct == 0)
185 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
186
187 if (*n == L('\0'))
188 return FNM_NOMATCH;
189
190 if (*n == L('.') && no_leading_period
191 && (n == string
192 || (n[-1] == L('/') && (flags & FNM_FILE_NAME))))
193 return FNM_NOMATCH;
194
195 if (*n == L('/') && (flags & FNM_FILE_NAME))
196 /* `/' cannot be matched. */
197 return FNM_NOMATCH;
198
199 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
200 if (not)
201 ++p;
202
203 c = *p++;
204 for (;;)
205 {
206 UCHAR fn = FOLD ((UCHAR) *n);
207
208 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
209 {
210 if (*p == L('\0'))
211 return FNM_NOMATCH;
212 c = FOLD ((UCHAR) *p);
213 ++p;
214
215 if (c == fn)
216 goto matched;
217 }
218 else if (c == L('[') && *p == L(':'))
219 {
220 /* Leave room for the null. */
221 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
222 size_t c1 = 0;
223 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
224 wctype_t wt;
225 #endif
226 const CHAR *startp = p;
227
228 for (;;)
229 {
230 if (c1 == CHAR_CLASS_MAX_LENGTH)
231 /* The name is too long and therefore the pattern
232 is ill-formed. */
233 return FNM_NOMATCH;
234
235 c = *++p;
236 if (c == L(':') && p[1] == L(']'))
237 {
238 p += 2;
239 break;
240 }
241 if (c < L('a') || c >= L('z'))
242 {
243 /* This cannot possibly be a character class name.
244 Match it as a normal range. */
245 p = startp;
246 c = L('[');
247 goto normal_bracket;
248 }
249 str[c1++] = c;
250 }
251 str[c1] = L('\0');
252
253 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
254 wt = IS_CHAR_CLASS (str);
255 if (wt == 0)
256 /* Invalid character class name. */
257 return FNM_NOMATCH;
258
259 /* The following code is glibc specific but does
260 there a good job in sppeding up the code since
261 we can avoid the btowc() call. The
262 IS_CHAR_CLASS call will return a bit mask for
263 the 32-bit table. We have to convert it to a
264 bitmask for the __ctype_b table. This has to
265 be done based on the byteorder as can be seen
266 below. In any case we will fall back on the
267 code using btowc() if the class is not one of
268 the standard classes. */
269 # if defined _LIBC && ! WIDE_CHAR_VERSION
270 # if __BYTE_ORDER == __LITTLE_ENDIAN
271 if ((wt & 0xf0ffff) == 0)
272 {
273 wt >>= 16;
274 if ((__ctype_b[(UCHAR) *n] & wt) != 0)
275 goto matched;
276 }
277 # else
278 if (wt <= 0x800)
279 {
280 if ((__ctype_b[(UCHAR) *n] & wt) != 0)
281 goto matched;
282 }
283 # endif
284 else
285 # endif
286 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
287 goto matched;
288 #else
289 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
290 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
291 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
292 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
293 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
294 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
295 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
296 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
297 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
298 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
299 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
300 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
301 goto matched;
302 #endif
303 c = *p++;
304 }
305 #ifdef _LIBC
306 else if (c == L('[') && *p == L('='))
307 {
308 UCHAR str[1];
309 uint32_t nrules =
310 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
311 const CHAR *startp = p;
312
313 c = *++p;
314 if (c == L('\0'))
315 {
316 p = startp;
317 c = L('[');
318 goto normal_bracket;
319 }
320 str[0] = c;
321
322 c = *++p;
323 if (c != L('=') || p[1] != L(']'))
324 {
325 p = startp;
326 c = L('[');
327 goto normal_bracket;
328 }
329 p += 2;
330
331 if (nrules == 0)
332 {
333 if ((UCHAR) *n == str[0])
334 goto matched;
335 }
336 else
337 {
338 const int32_t *table;
339 # if WIDE_CHAR_VERSION
340 const int32_t *weights;
341 const int32_t *extra;
342 # else
343 const unsigned char *weights;
344 const unsigned char *extra;
345 # endif
346 const int32_t *indirect;
347 int32_t idx;
348 const UCHAR *cp = (const UCHAR *) str;
349
350 /* This #include defines a local function! */
351 # if WIDE_CHAR_VERSION
352 # include <locale/weightwc.h>
353 # else
354 # include <locale/weight.h>
355 # endif
356
357 # if WIDE_CHAR_VERSION
358 table = (const int32_t *)
359 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
360 weights = (const int32_t *)
361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
362 extra = (const int32_t *)
363 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
364 indirect = (const int32_t *)
365 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
366 # else
367 table = (const int32_t *)
368 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
369 weights = (const unsigned char *)
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
371 extra = (const unsigned char *)
372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
373 indirect = (const int32_t *)
374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
375 # endif
376
377 idx = findidx (&cp);
378 if (idx != 0)
379 {
380 /* We found a table entry. Now see whether the
381 character we are currently at has the same
382 equivalance class value. */
383 int len = weights[idx];
384 int32_t idx2;
385 const UCHAR *np = (const UCHAR *) n;
386
387 idx2 = findidx (&np);
388 # if !WIDE_CHAR_VERSION
389 if (idx2 != 0 && len == weights[idx2])
390 {
391 int cnt = 0;
392
393 while (cnt < len
394 && (weights[idx + 1 + cnt]
395 == weights[idx2 + 1 + cnt]))
396 ++cnt;
397
398 if (cnt == len)
399 goto matched;
400 }
401 # else
402 if (idx2 != 0 && weights[idx] == weights[idx2])
403 goto matched;
404 # endif
405 }
406 }
407
408 c = *p++;
409 }
410 #endif
411 else if (c == L('\0'))
412 /* [ (unterminated) loses. */
413 return FNM_NOMATCH;
414 else
415 {
416 c = FOLD (c);
417 normal_bracket:
418 if (c == fn)
419 goto matched;
420
421 cold = c;
422 c = *p++;
423
424 if (c == L('-') && *p != L(']'))
425 {
426 #if _LIBC
427 /* We have to find the collation sequence
428 value for C. Collation sequence is nothing
429 we can regularly access. The sequence
430 value is defined by the order in which the
431 definitions of the collation values for the
432 various characters appear in the source
433 file. A strange concept, nowhere
434 documented. */
435 int32_t fseqidx;
436 int32_t lseqidx;
437 UCHAR cend = *p++;
438 # ifdef WIDE_CHAR_VERSION
439 size_t cnt;
440 # endif
441
442 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
443 cend = *p++;
444 if (cend == L('\0'))
445 return FNM_NOMATCH;
446
447 # ifdef WIDE_CHAR_VERSION
448 /* Search in the `names' array for the characters. */
449 fseqidx = fn % size;
450 cnt = 0;
451 while (names[fseqidx] != fn)
452 {
453 if (++cnt == layers)
454 /* XXX We don't know anything about
455 the character we are supposed to
456 match. This means we are failing. */
457 goto range_not_matched;
458
459 fseqidx += size;
460 }
461 lseqidx = cold % size;
462 cnt = 0;
463 while (names[lseqidx] != cold)
464 {
465 if (++cnt == layers)
466 {
467 lseqidx = -1;
468 break;
469 }
470 lseqidx += size;
471 }
472 # else
473 fseqidx = fn;
474 lseqidx = cold;
475 # endif
476
477 /* XXX It is not entirely clear to me how to handle
478 characters which are not mentioned in the
479 collation specification. */
480 if (
481 # ifdef WIDE_CHAR_VERSION
482 lseqidx == -1 ||
483 # endif
484 collseq[lseqidx] <= collseq[fseqidx])
485 {
486 /* We have to look at the upper bound. */
487 int32_t hseqidx;
488
489 cend = FOLD (cend);
490 # ifdef WIDE_CHAR_VERSION
491 hseqidx = cend % size;
492 cnt = 0;
493 while (names[hseqidx] != cend)
494 {
495 if (++cnt == layers)
496 {
497 /* Hum, no information about the upper
498 bound. The matching succeeds if the
499 lower bound is matched exactly. */
500 if (lseqidx == -1 || cold != fn)
501 goto range_not_matched;
502
503 goto matched;
504 }
505 }
506 # else
507 hseqidx = cend;
508 # endif
509
510 if (
511 # ifdef WIDE_CHAR_VERSION
512 (lseqidx == -1
513 && collseq[fseqidx] == collseq[hseqidx]) ||
514 # endif
515 collseq[fseqidx] <= collseq[hseqidx])
516 goto matched;
517 }
518 # ifdef WIDE_CHAR_VERSION
519 range_not_matched:
520 # endif
521 #else
522 /* We use a boring value comparison of the character
523 values. This is better than comparing using
524 `strcoll' since the latter would have surprising
525 and sometimes fatal consequences. */
526 UCHAR cend = *p++;
527
528 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
529 cend = *p++;
530 if (cend == L('\0'))
531 return FNM_NOMATCH;
532
533 /* It is a range. */
534 if (cold <= fc && fc <= c)
535 goto matched;
536 #endif
537
538 c = *p++;
539 }
540 }
541
542 if (c == L(']'))
543 break;
544 }
545
546 if (!not)
547 return FNM_NOMATCH;
548 break;
549
550 matched:
551 /* Skip the rest of the [...] that already matched. */
552 do
553 {
554 c = *p++;
555
556 if (c == L('\0'))
557 /* [... (unterminated) loses. */
558 return FNM_NOMATCH;
559
560 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
561 {
562 if (*p == L('\0'))
563 return FNM_NOMATCH;
564 /* XXX 1003.2d11 is unclear if this is right. */
565 ++p;
566 }
567 else if (c == L('[') && *p == L(':'))
568 {
569 do
570 if (*++p == L('\0'))
571 return FNM_NOMATCH;
572 while (*p != L(':') || p[1] == L(']'));
573 p += 2;
574 c = *p;
575 }
576 }
577 while (c != L(']'));
578 if (not)
579 return FNM_NOMATCH;
580 }
581 break;
582
583 default:
584 if (c != FOLD ((UCHAR) *n))
585 return FNM_NOMATCH;
586 }
587
588 ++n;
589 }
590
591 if (*n == '\0')
592 return 0;
593
594 if ((flags & FNM_LEADING_DIR) && *n == L('/'))
595 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
596 return 0;
597
598 return FNM_NOMATCH;
599 }
600
601 #undef FOLD
602 #undef CHAR
603 #undef UCHAR
604 #undef FCT
605 #undef STRCHR
606 #undef STRCHRNUL
607 #undef STRCOLL
608 #undef L
609 #undef BTOWC
610 #undef SUFFIX
This page took 0.071024 seconds and 6 git commands to generate.