This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.
Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Hi! Just small optimizations, the speedup is not big but they also decrease regex.os code size. E.g. no DFA state can be has_backref it dfa->nbackref == 0, so there is no need to do too many checks in the fast path unless there are back references. Also, this patch adds a few new palindrome tests to bug-regex11. 2003-11-28 Jakub Jelinek <jakub@redhat.com> * posix/regexec.c (acquire_init_state_context): Make inline. Add always_inline attribute. (check_matching): Add BE macro. Move if (cur_state->has_backref) into if (dfa->nbackref). (sift_states_backward): Fix comment. (transit_state): Add BE macro. Move if (next_state->has_backref) into if (dfa->nbackref && next_state). Don't check for next_state != NULL twice. * posix/regcomp.c (peek_token): Use opr.ctx_type instead of opr.idx for ANCHOR. (parse_expression): Only call init_word_char if word context will be needed. * posix/bug-regex11.c (tests): Add new tests. * posix/tst-regex.c: Include getopt.h. (timing): New variable. (main): Set timing to 1 if --timing argument is present. Add 2 new tests. (run_test, run_test_backwards): Handle timing. --- libc/posix/regexec.c.jj 2003-11-27 17:36:46.000000000 +0100 +++ libc/posix/regexec.c 2003-11-28 23:24:41.000000000 +0100 @@ -50,10 +50,9 @@ static int re_search_stub (struct re_pat int ret_len); static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, int nregs, int regs_allocated); -static re_dfastate_t *acquire_init_state_context (reg_errcode_t *err, - const regex_t *preg, - const re_match_context_t *mctx, - int idx); +static inline re_dfastate_t *acquire_init_state_context + (reg_errcode_t *err, const regex_t *preg, const re_match_context_t *mctx, + int idx) __attribute ((always_inline)); static reg_errcode_t prune_impossible_nodes (const regex_t *preg, re_match_context_t *mctx); static int check_matching (const regex_t *preg, re_match_context_t *mctx, @@ -921,7 +920,7 @@ prune_impossible_nodes (preg, mctx) We must select appropriate initial state depending on the context, since initial states may have constraints like "\<", "^", etc.. */ -static re_dfastate_t * +static inline re_dfastate_t * acquire_init_state_context (err, preg, mctx, idx) reg_errcode_t *err; const regex_t *preg; @@ -988,22 +987,22 @@ check_matching (preg, mctx, fl_longest_m /* Check OP_OPEN_SUBEXP in the initial state in case that we use them later. E.g. Processing back references. */ - if (dfa->nbackref) + if (BE (dfa->nbackref, 0)) { err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0); if (BE (err != REG_NOERROR, 0)) return err; - } - if (cur_state->has_backref) - { - err = transit_state_bkref (preg, &cur_state->nodes, mctx); - if (BE (err != REG_NOERROR, 0)) - return err; + if (cur_state->has_backref) + { + err = transit_state_bkref (preg, &cur_state->nodes, mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + } } /* If the RE accepts NULL string. */ - if (cur_state->halt) + if (BE (cur_state->halt, 0)) { if (!cur_state->has_constraint || check_halt_state_context (preg, cur_state, mctx, cur_str_idx)) @@ -1384,11 +1383,11 @@ update_regs (dfa, pmatch, cur_node, cur_ i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw away the node `a'. ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is - throwed away, we throw away the node `a'. + thrown away, we throw away the node `a'. 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the node `a'. - ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away, + ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, we throw away the node `a'. */ #define STATE_NODE_CONTAINS(state,node) \ @@ -2053,7 +2052,7 @@ sift_states_iter_mb (preg, mctx, sctx, n !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], dfa->nexts[node_idx])) /* The node can't accept the `multi byte', or the - destination was already throwed away, then the node + destination was already thrown away, then the node could't accept the current input `multi byte'. */ naccepted = 0; /* Otherwise, it is sure that the node could accept @@ -2200,24 +2199,24 @@ transit_state (err, preg, mctx, state) } } - /* Check OP_OPEN_SUBEXP in the current state in case that we use them - later. We must check them here, since the back references in the - next state might use them. */ - if (dfa->nbackref && next_state/* && fl_process_bkref */) + if (BE (dfa->nbackref, 0) && next_state != NULL) { + /* Check OP_OPEN_SUBEXP in the current state in case that we use them + later. We must check them here, since the back references in the + next state might use them. */ *err = check_subexp_matching_top (dfa, mctx, &next_state->nodes, cur_idx); if (BE (*err != REG_NOERROR, 0)) return NULL; - } - /* If the next state has back references. */ - if (next_state != NULL && next_state->has_backref) - { - *err = transit_state_bkref (preg, &next_state->nodes, mctx); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - next_state = mctx->state_log[cur_idx]; + /* If the next state has back references. */ + if (next_state->has_backref) + { + *err = transit_state_bkref (preg, &next_state->nodes, mctx); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + next_state = mctx->state_log[cur_idx]; + } } return next_state; } --- libc/posix/regcomp.c.jj 2003-11-27 14:22:56.000000000 +0100 +++ libc/posix/regcomp.c 2003-11-27 22:32:27.000000000 +0100 @@ -1711,28 +1711,28 @@ peek_token (token, input, syntax) if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; - token->opr.idx = WORD_FIRST; + token->opr.ctx_type = WORD_FIRST; } break; case '>': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; - token->opr.idx = WORD_LAST; + token->opr.ctx_type = WORD_LAST; } break; case 'b': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; - token->opr.idx = WORD_DELIM; + token->opr.ctx_type = WORD_DELIM; } break; case 'B': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; - token->opr.idx = INSIDE_WORD; + token->opr.ctx_type = INSIDE_WORD; } break; case 'w': @@ -1755,14 +1755,14 @@ peek_token (token, input, syntax) if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; - token->opr.idx = BUF_FIRST; + token->opr.ctx_type = BUF_FIRST; } break; case '\'': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; - token->opr.idx = BUF_LAST; + token->opr.ctx_type = BUF_LAST; } break; case '(': @@ -1858,7 +1858,7 @@ peek_token (token, input, syntax) break; } token->type = ANCHOR; - token->opr.idx = LINE_FIRST; + token->opr.ctx_type = LINE_FIRST; break; case '$': if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && @@ -1872,7 +1872,7 @@ peek_token (token, input, syntax) break; } token->type = ANCHOR; - token->opr.idx = LINE_LAST; + token->opr.ctx_type = LINE_LAST; break; default: break; @@ -2217,7 +2217,9 @@ parse_expression (regexp, preg, token, s } break; case ANCHOR: - if (dfa->word_char == NULL) + if ((token->opr.ctx_type + & (WORD_DELIM | INSIDE_WORD | WORD_FIRST | WORD_LAST)) + && dfa->word_char == NULL) { *err = init_word_char (dfa); if (BE (*err != REG_NOERROR, 0)) --- libc/posix/bug-regex11.c.jj 2003-11-26 15:46:18.000000000 +0100 +++ libc/posix/bug-regex11.c 2003-11-28 21:16:39.000000000 +0100 @@ -71,10 +71,18 @@ struct { "(bb())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 4 }, { 0, 2 }, { 2, 2 } } }, { "^(.?)(.?)(.?)(.?)(.?).?\\5\\4\\3\\2\\1$", "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, + { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$", + "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, + { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$", + "abcdedcba", REG_EXTENDED, 1, { { 0, 9 } } }, #if 0 /* XXX Not used since they fail so far. */ + { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$", + "ababababa", REG_EXTENDED, 1, { { 0, 9 } } }, { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$", "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, + { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$", + "ababababa", REG_EXTENDED, 1, { { 0, 9 } } }, #endif }; --- libc/posix/tst-regex.c.jj 2003-11-12 21:33:35.000000000 +0100 +++ libc/posix/tst-regex.c 2003-11-28 22:57:53.000000000 +0100 @@ -23,6 +23,7 @@ #include <errno.h> #include <error.h> #include <fcntl.h> +#include <getopt.h> #include <iconv.h> #include <locale.h> #include <mcheck.h> @@ -45,6 +46,7 @@ static char *mem; static char *umem; static size_t memlen; static size_t umemlen; +static int timing; static int test_expr (const char *expr, int expected, int expectedicase); static int run_test (const char *expr, const char *mem, size_t memlen, @@ -54,7 +56,7 @@ static int run_test_backwards (const cha int -main (void) +main (int argc, char *argv[]) { const char *file; int fd; @@ -64,9 +66,16 @@ main (void) char *outmem; size_t inlen; size_t outlen; + static const struct option options[] = + { + {"timing",no_argument, &timing, 1 }, + {NULL, 0, NULL, 0 } + }; mtrace (); + while (getopt_long (argc, argv, "", options, NULL) >= 0); + /* Make the content of the file available in memory. */ file = "../ChangeLog.8"; fd = open (file, O_RDONLY); @@ -125,6 +134,8 @@ main (void) result |= test_expr ("G.\\{1\\}ran", 2, 3); result |= test_expr ("G.*ran", 3, 44); result |= test_expr ("[äáàâ]", 0, 0); + result |= test_expr ("Uddeborg", 2, 2); + result |= test_expr (".Uddeborg", 2, 2); /* Free the resources. */ free (umem); @@ -201,7 +212,7 @@ run_test (const char *expr, const char * int cnt; #ifdef _POSIX_CPUTIME - if (use_clock) + if (use_clock && !timing) use_clock = clock_gettime (cl, &start) == 0; #endif @@ -250,7 +261,7 @@ run_test (const char *expr, const char * regfree (&re); #ifdef _POSIX_CPUTIME - if (use_clock) + if (use_clock && !timing) { use_clock = clock_gettime (cl, &finish) == 0; if (use_clock) @@ -270,6 +281,58 @@ run_test (const char *expr, const char * finish.tv_sec, finish.tv_nsec); } } + + if (use_clock && timing) + { + struct timespec mintime = { .tv_sec = 24 * 60 * 60 }; + + for (int i = 0; i < 10; ++i) + { + offset = 0; + use_clock = clock_gettime (cl, &start) == 0; + + if (!use_clock) + continue; + + err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0)); + if (err != REG_NOERROR) + continue; + + while (offset < memlen) + { + regmatch_t ma[1]; + + err = regexec (&re, mem + offset, 1, ma, 0); + if (err != REG_NOERROR) + break; + + offset += ma[0].rm_eo; + } + + regfree (&re); + + use_clock = clock_gettime (cl, &finish) == 0; + if (use_clock) + { + if (finish.tv_nsec < start.tv_nsec) + { + finish.tv_nsec -= start.tv_nsec - 1000000000; + finish.tv_sec -= 1 + start.tv_sec; + } + else + { + finish.tv_nsec -= start.tv_nsec; + finish.tv_sec -= start.tv_sec; + } + if (finish.tv_sec < mintime.tv_sec + || (finish.tv_sec == mintime.tv_sec + && finish.tv_nsec < mintime.tv_nsec)) + mintime = finish; + } + } + printf ("elapsed time: %ld.%09ld sec\n", + mintime.tv_sec, mintime.tv_nsec); + } #endif /* Return an error if the number of matches found is not match we @@ -292,7 +355,7 @@ run_test_backwards (const char *expr, co int cnt; #ifdef _POSIX_CPUTIME - if (use_clock) + if (use_clock && !timing) use_clock = clock_gettime (cl, &start) == 0; #endif @@ -344,7 +407,7 @@ run_test_backwards (const char *expr, co regfree (&re); #ifdef _POSIX_CPUTIME - if (use_clock) + if (use_clock && !timing) { use_clock = clock_gettime (cl, &finish) == 0; if (use_clock) @@ -364,6 +427,74 @@ run_test_backwards (const char *expr, co finish.tv_sec, finish.tv_nsec); } } + + if (use_clock && timing) + { + struct timespec mintime = { .tv_sec = 24 * 60 * 60 }; + + for (int i = 0; i < 10; ++i) + { + offset = memlen; + use_clock = clock_gettime (cl, &start) == 0; + + if (!use_clock) + continue; + + memset (&re, 0, sizeof (re)); + re.fastmap = malloc (256); + if (re.fastmap == NULL) + continue; + + err = re_compile_pattern (expr, strlen (expr), &re); + if (err != NULL) + continue; + + if (re_compile_fastmap (&re)) + { + regfree (&re); + continue; + } + + while (offset <= memlen) + { + int start; + const char *sp; + + start = re_search (&re, mem, memlen, offset, -offset, NULL); + if (start < -1) + break; + + sp = mem + start; + while (sp > mem && sp[-1] != '\n') + --sp; + + offset = sp - 1 - mem; + } + + regfree (&re); + + use_clock = clock_gettime (cl, &finish) == 0; + if (use_clock) + { + if (finish.tv_nsec < start.tv_nsec) + { + finish.tv_nsec -= start.tv_nsec - 1000000000; + finish.tv_sec -= 1 + start.tv_sec; + } + else + { + finish.tv_nsec -= start.tv_nsec; + finish.tv_sec -= start.tv_sec; + } + if (finish.tv_sec < mintime.tv_sec + || (finish.tv_sec == mintime.tv_sec + && finish.tv_nsec < mintime.tv_nsec)) + mintime = finish; + } + } + printf ("elapsed time: %ld.%09ld sec\n", + mintime.tv_sec, mintime.tv_nsec); + } #endif /* Return an error if the number of matches found is not match we Jakub
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |