]> sourceware.org Git - glibc.git/blob - posix/fnmatch_loop.c
(FCT): Only declare len if
[glibc.git] / posix / fnmatch_loop.c
1 /* Copyright (C) 1991-1993, 1996-1999, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
13
14 You should have received a copy of the GNU Library General Public
15 License along with this library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
18
19 /* Match STRING against the filename pattern PATTERN, returning zero if
20 it matches, nonzero if not. */
21 static int FCT (const CHAR *pattern, const CHAR *string,
22 int no_leading_period, int flags) internal_function;
23
24 static int
25 internal_function
26 FCT (pattern, string, no_leading_period, flags)
27 const CHAR *pattern;
28 const CHAR *string;
29 int no_leading_period;
30 int flags;
31 {
32 register const CHAR *p = pattern, *n = string;
33 register UCHAR c;
34 #ifdef _LIBC
35 const UCHAR *collseq = (const UCHAR *)
36 _NL_CURRENT(LC_COLLATE, CONCAT(_NL_COLLATE_COLLSEQ,SUFFIX));
37 # ifdef WIDE_CHAR_VERSION
38 const wint_t *names = (const wint_t *)
39 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_NAMES);
40 size_t size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE);
41 size_t layers = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS);
42 # endif
43 #endif
44
45 while ((c = *p++) != L('\0'))
46 {
47 c = FOLD (c);
48
49 switch (c)
50 {
51 case L('?'):
52 if (*n == L('\0'))
53 return FNM_NOMATCH;
54 else if (*n == L('/') && (flags & FNM_FILE_NAME))
55 return FNM_NOMATCH;
56 else if (*n == L('.') && no_leading_period
57 && (n == string
58 || (n[-1] == L('/') && (flags & FNM_FILE_NAME))))
59 return FNM_NOMATCH;
60 break;
61
62 case L('\\'):
63 if (!(flags & FNM_NOESCAPE))
64 {
65 c = *p++;
66 if (c == L('\0'))
67 /* Trailing \ loses. */
68 return FNM_NOMATCH;
69 c = FOLD (c);
70 }
71 if (FOLD ((UCHAR) *n) != c)
72 return FNM_NOMATCH;
73 break;
74
75 case L('*'):
76 if (*n == L('.') && no_leading_period
77 && (n == string
78 || (n[-1] == L('/') && (flags & FNM_FILE_NAME))))
79 return FNM_NOMATCH;
80
81 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
82 {
83 if (*n == L('/') && (flags & FNM_FILE_NAME))
84 /* A slash does not match a wildcard under FNM_FILE_NAME. */
85 return FNM_NOMATCH;
86 else if (c == L('?'))
87 {
88 /* A ? needs to match one character. */
89 if (*n == L('\0'))
90 /* There isn't another character; no match. */
91 return FNM_NOMATCH;
92 else
93 /* One character of the string is consumed in matching
94 this ? wildcard, so *??? won't match if there are
95 less than three characters. */
96 ++n;
97 }
98 }
99
100 if (c == L('\0'))
101 /* The wildcard(s) is/are the last element of the pattern.
102 If the name is a file name and contains another slash
103 this does mean it cannot match. If the FNM_LEADING_DIR
104 flag is set and exactly one slash is following, we have
105 a match. */
106 {
107 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
108
109 if (flags & FNM_FILE_NAME)
110 {
111 const CHAR *slashp = STRCHR (n, L('/'));
112
113 if (flags & FNM_LEADING_DIR)
114 {
115 if (slashp != NULL
116 && STRCHR (slashp + 1, L('/')) == NULL)
117 result = 0;
118 }
119 else
120 {
121 if (slashp == NULL)
122 result = 0;
123 }
124 }
125
126 return result;
127 }
128 else
129 {
130 const CHAR *endp;
131
132 endp = STRCHRNUL (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'));
133
134 if (c == L('['))
135 {
136 int flags2 = ((flags & FNM_FILE_NAME)
137 ? flags : (flags & ~FNM_PERIOD));
138
139 for (--p; n < endp; ++n)
140 if (FCT (p, n, (no_leading_period
141 && (n == string
142 || (n[-1] == L('/')
143 && (flags & FNM_FILE_NAME)))),
144 flags2) == 0)
145 return 0;
146 }
147 else if (c == L('/') && (flags & FNM_FILE_NAME))
148 {
149 while (*n != L('\0') && *n != L('/'))
150 ++n;
151 if (*n == L('/')
152 && (FCT (p, n + 1, flags & FNM_PERIOD, flags) == 0))
153 return 0;
154 }
155 else
156 {
157 int flags2 = ((flags & FNM_FILE_NAME)
158 ? flags : (flags & ~FNM_PERIOD));
159
160 if (c == L('\\') && !(flags & FNM_NOESCAPE))
161 c = *p;
162 c = FOLD (c);
163 for (--p; n < endp; ++n)
164 if (FOLD ((UCHAR) *n) == c
165 && (FCT (p, n, (no_leading_period
166 && (n == string
167 || (n[-1] == L('/')
168 && (flags & FNM_FILE_NAME)))),
169 flags2) == 0))
170 return 0;
171 }
172 }
173
174 /* If we come here no match is possible with the wildcard. */
175 return FNM_NOMATCH;
176
177 case L('['):
178 {
179 static int posixly_correct;
180 /* Nonzero if the sense of the character class is inverted. */
181 register int not;
182 CHAR cold;
183
184 if (posixly_correct == 0)
185 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
186
187 if (*n == L('\0'))
188 return FNM_NOMATCH;
189
190 if (*n == L('.') && no_leading_period
191 && (n == string
192 || (n[-1] == L('/') && (flags & FNM_FILE_NAME))))
193 return FNM_NOMATCH;
194
195 if (*n == L('/') && (flags & FNM_FILE_NAME))
196 /* `/' cannot be matched. */
197 return FNM_NOMATCH;
198
199 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
200 if (not)
201 ++p;
202
203 c = *p++;
204 for (;;)
205 {
206 UCHAR fn = FOLD ((UCHAR) *n);
207
208 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
209 {
210 if (*p == L('\0'))
211 return FNM_NOMATCH;
212 c = FOLD ((UCHAR) *p);
213 ++p;
214
215 if (c == fn)
216 goto matched;
217 }
218 else if (c == L('[') && *p == L(':'))
219 {
220 /* Leave room for the null. */
221 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
222 size_t c1 = 0;
223 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
224 wctype_t wt;
225 #endif
226 const CHAR *startp = p;
227
228 for (;;)
229 {
230 if (c1 == CHAR_CLASS_MAX_LENGTH)
231 /* The name is too long and therefore the pattern
232 is ill-formed. */
233 return FNM_NOMATCH;
234
235 c = *++p;
236 if (c == L(':') && p[1] == L(']'))
237 {
238 p += 2;
239 break;
240 }
241 if (c < L('a') || c >= L('z'))
242 {
243 /* This cannot possibly be a character class name.
244 Match it as a normal range. */
245 p = startp;
246 c = L('[');
247 goto normal_bracket;
248 }
249 str[c1++] = c;
250 }
251 str[c1] = L('\0');
252
253 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
254 wt = IS_CHAR_CLASS (str);
255 if (wt == 0)
256 /* Invalid character class name. */
257 return FNM_NOMATCH;
258
259 /* The following code is glibc specific but does
260 there a good job in sppeding up the code since
261 we can avoid the btowc() call. The
262 IS_CHAR_CLASS call will return a bit mask for
263 the 32-bit table. We have to convert it to a
264 bitmask for the __ctype_b table. This has to
265 be done based on the byteorder as can be seen
266 below. In any case we will fall back on the
267 code using btowc() if the class is not one of
268 the standard classes. */
269 # if defined _LIBC && ! WIDE_CHAR_VERSION
270 # if __BYTE_ORDER == __LITTLE_ENDIAN
271 if ((wt & 0xf0ffff) == 0)
272 {
273 wt >>= 16;
274 if ((__ctype_b[(UCHAR) *n] & wt) != 0)
275 goto matched;
276 }
277 # else
278 if (wt <= 0x800)
279 {
280 if ((__ctype_b[(UCHAR) *n] & wt) != 0)
281 goto matched;
282 }
283 # endif
284 else
285 # endif
286 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
287 goto matched;
288 #else
289 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
290 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
291 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
292 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
293 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
294 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
295 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
296 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
297 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
298 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
299 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
300 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
301 goto matched;
302 #endif
303 c = *p++;
304 }
305 #ifdef _LIBC
306 else if (c == L('[') && *p == L('='))
307 {
308 UCHAR str[1];
309 uint32_t nrules =
310 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
311 const CHAR *startp = p;
312
313 c = *++p;
314 if (c == L('\0'))
315 {
316 p = startp;
317 c = L('[');
318 goto normal_bracket;
319 }
320 str[0] = c;
321
322 c = *++p;
323 if (c != L('=') || p[1] != L(']'))
324 {
325 p = startp;
326 c = L('[');
327 goto normal_bracket;
328 }
329 p += 2;
330
331 if (nrules == 0)
332 {
333 if ((UCHAR) *n == str[0])
334 goto matched;
335 }
336 else
337 {
338 const int32_t *table;
339 # if WIDE_CHAR_VERSION
340 const int32_t *weights;
341 const int32_t *extra;
342 # else
343 const unsigned char *weights;
344 const unsigned char *extra;
345 # endif
346 const int32_t *indirect;
347 int32_t idx;
348 const UCHAR *cp = (const UCHAR *) str;
349
350 /* This #include defines a local function! */
351 # if WIDE_CHAR_VERSION
352 # include <locale/weightwc.h>
353 # else
354 # include <locale/weight.h>
355 # endif
356
357 # if WIDE_CHAR_VERSION
358 table = (const int32_t *)
359 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
360 weights = (const int32_t *)
361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
362 extra = (const int32_t *)
363 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
364 indirect = (const int32_t *)
365 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
366 # else
367 table = (const int32_t *)
368 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
369 weights = (const unsigned char *)
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
371 extra = (const unsigned char *)
372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
373 indirect = (const int32_t *)
374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
375 # endif
376
377 idx = findidx (&cp);
378 if (idx != 0)
379 {
380 /* We found a table entry. Now see whether the
381 character we are currently at has the same
382 equivalance class value. */
383 # if !WIDE_CHAR_VERSION
384 int len = weights[idx];
385 # endif
386 int32_t idx2;
387 const UCHAR *np = (const UCHAR *) n;
388
389 idx2 = findidx (&np);
390 # if !WIDE_CHAR_VERSION
391 if (idx2 != 0 && len == weights[idx2])
392 {
393 int cnt = 0;
394
395 while (cnt < len
396 && (weights[idx + 1 + cnt]
397 == weights[idx2 + 1 + cnt]))
398 ++cnt;
399
400 if (cnt == len)
401 goto matched;
402 }
403 # else
404 if (idx2 != 0 && weights[idx] == weights[idx2])
405 goto matched;
406 # endif
407 }
408 }
409
410 c = *p++;
411 }
412 #endif
413 else if (c == L('\0'))
414 /* [ (unterminated) loses. */
415 return FNM_NOMATCH;
416 else
417 {
418 c = FOLD (c);
419 normal_bracket:
420 if (c == fn)
421 goto matched;
422
423 cold = c;
424 c = *p++;
425
426 if (c == L('-') && *p != L(']'))
427 {
428 #if _LIBC
429 /* We have to find the collation sequence
430 value for C. Collation sequence is nothing
431 we can regularly access. The sequence
432 value is defined by the order in which the
433 definitions of the collation values for the
434 various characters appear in the source
435 file. A strange concept, nowhere
436 documented. */
437 int32_t fseqidx;
438 int32_t lseqidx;
439 UCHAR cend = *p++;
440 # ifdef WIDE_CHAR_VERSION
441 size_t cnt;
442 # endif
443
444 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
445 cend = *p++;
446 if (cend == L('\0'))
447 return FNM_NOMATCH;
448
449 # ifdef WIDE_CHAR_VERSION
450 /* Search in the `names' array for the characters. */
451 fseqidx = fn % size;
452 cnt = 0;
453 while (names[fseqidx] != fn)
454 {
455 if (++cnt == layers)
456 /* XXX We don't know anything about
457 the character we are supposed to
458 match. This means we are failing. */
459 goto range_not_matched;
460
461 fseqidx += size;
462 }
463 lseqidx = cold % size;
464 cnt = 0;
465 while (names[lseqidx] != cold)
466 {
467 if (++cnt == layers)
468 {
469 lseqidx = -1;
470 break;
471 }
472 lseqidx += size;
473 }
474 # else
475 fseqidx = fn;
476 lseqidx = cold;
477 # endif
478
479 /* XXX It is not entirely clear to me how to handle
480 characters which are not mentioned in the
481 collation specification. */
482 if (
483 # ifdef WIDE_CHAR_VERSION
484 lseqidx == -1 ||
485 # endif
486 collseq[lseqidx] <= collseq[fseqidx])
487 {
488 /* We have to look at the upper bound. */
489 int32_t hseqidx;
490
491 cend = FOLD (cend);
492 # ifdef WIDE_CHAR_VERSION
493 hseqidx = cend % size;
494 cnt = 0;
495 while (names[hseqidx] != cend)
496 {
497 if (++cnt == layers)
498 {
499 /* Hum, no information about the upper
500 bound. The matching succeeds if the
501 lower bound is matched exactly. */
502 if (lseqidx == -1 || cold != fn)
503 goto range_not_matched;
504
505 goto matched;
506 }
507 }
508 # else
509 hseqidx = cend;
510 # endif
511
512 if (
513 # ifdef WIDE_CHAR_VERSION
514 (lseqidx == -1
515 && collseq[fseqidx] == collseq[hseqidx]) ||
516 # endif
517 collseq[fseqidx] <= collseq[hseqidx])
518 goto matched;
519 }
520 # ifdef WIDE_CHAR_VERSION
521 range_not_matched:
522 # endif
523 #else
524 /* We use a boring value comparison of the character
525 values. This is better than comparing using
526 `strcoll' since the latter would have surprising
527 and sometimes fatal consequences. */
528 UCHAR cend = *p++;
529
530 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
531 cend = *p++;
532 if (cend == L('\0'))
533 return FNM_NOMATCH;
534
535 /* It is a range. */
536 if (cold <= fc && fc <= c)
537 goto matched;
538 #endif
539
540 c = *p++;
541 }
542 }
543
544 if (c == L(']'))
545 break;
546 }
547
548 if (!not)
549 return FNM_NOMATCH;
550 break;
551
552 matched:
553 /* Skip the rest of the [...] that already matched. */
554 do
555 {
556 c = *p++;
557
558 if (c == L('\0'))
559 /* [... (unterminated) loses. */
560 return FNM_NOMATCH;
561
562 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
563 {
564 if (*p == L('\0'))
565 return FNM_NOMATCH;
566 /* XXX 1003.2d11 is unclear if this is right. */
567 ++p;
568 }
569 else if (c == L('[') && *p == L(':'))
570 {
571 do
572 if (*++p == L('\0'))
573 return FNM_NOMATCH;
574 while (*p != L(':') || p[1] == L(']'));
575 p += 2;
576 c = *p;
577 }
578 }
579 while (c != L(']'));
580 if (not)
581 return FNM_NOMATCH;
582 }
583 break;
584
585 default:
586 if (c != FOLD ((UCHAR) *n))
587 return FNM_NOMATCH;
588 }
589
590 ++n;
591 }
592
593 if (*n == '\0')
594 return 0;
595
596 if ((flags & FNM_LEADING_DIR) && *n == L('/'))
597 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
598 return 0;
599
600 return FNM_NOMATCH;
601 }
602
603 #undef FOLD
604 #undef CHAR
605 #undef UCHAR
606 #undef FCT
607 #undef STRCHR
608 #undef STRCHRNUL
609 #undef STRCOLL
610 #undef L
611 #undef BTOWC
612 #undef SUFFIX
This page took 0.067111 seconds and 5 git commands to generate.