From 963d8d782fc98fb6dc3a66f0068795f9920c269d Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Thu, 27 Jan 2005 19:08:10 +0000 Subject: [BZ #558] Update. 2005-01-27 Paolo Bonzini [BZ #558] * posix/regcomp.c (calc_inveclosure): Return reg_errcode_t. Initialize the node sets in dfa->inveclosures. (analyze): Initialize inveclosures only if it is needed. Check errors from calc_inveclosure. * posix/regex_internal.c (re_dfa_add_node): Do not initialize the inveclosure node set. * posix/regexec.c (re_search_internal): If nmatch includes unused subexpressions, reset them to { rm_so: -1, rm_eo: -1 } here. * posix/regcomp.c (parse_bracket_exp) [!RE_ENABLE_I18N]: Do build a SIMPLE_BRACKET token. * posix/regexec.c (transit_state_mb): Do not examine nodes where ACCEPT_MB is not set. --- ChangeLog | 18 ++++++++++++++++++ posix/regcomp.c | 40 +++++++++++++++++++++++++++++----------- posix/regex_internal.c | 9 ++------- posix/regexec.c | 36 +++++++++++++++++++++--------------- 4 files changed, 70 insertions(+), 33 deletions(-) diff --git a/ChangeLog b/ChangeLog index 059ed87f01..98be25bf3c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +2005-01-27 Paolo Bonzini + + [BZ #558] + * posix/regcomp.c (calc_inveclosure): Return reg_errcode_t. + Initialize the node sets in dfa->inveclosures. + (analyze): Initialize inveclosures only if it is needed. + Check errors from calc_inveclosure. + * posix/regex_internal.c (re_dfa_add_node): Do not initialize + the inveclosure node set. + * posix/regexec.c (re_search_internal): If nmatch includes unused + subexpressions, reset them to { rm_so: -1, rm_eo: -1 } here. + + * posix/regcomp.c (parse_bracket_exp) [!RE_ENABLE_I18N]: + Do build a SIMPLE_BRACKET token. + + * posix/regexec.c (transit_state_mb): Do not examine nodes + where ACCEPT_MB is not set. + 2005-01-27 Jakub Jelinek * stdlib/tst-fmtmsg.c: Include stdlib.h. diff --git a/posix/regcomp.c b/posix/regcomp.c index cf759690cf..1a5f7952c3 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -58,7 +58,7 @@ static int search_duplicated_node (re_dfa_t *dfa, int org_node, static reg_errcode_t calc_eclosure (re_dfa_t *dfa); static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root); -static void calc_inveclosure (re_dfa_t *dfa); +static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); static int fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax); static void fetch_token (re_token_t *result, re_string_t *input, @@ -1132,9 +1132,8 @@ analyze (preg) dfa->org_indices = re_malloc (int, dfa->nodes_alloc); dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); - dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc); if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL - || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0)) + || dfa->eclosures == NULL, 0)) return REG_ESPACE; dfa->subexp_map = re_malloc (int, preg->re_nsub); @@ -1167,7 +1166,18 @@ analyze (preg) ret = calc_eclosure (dfa); if (BE (ret != REG_NOERROR, 0)) return ret; - calc_inveclosure (dfa); + + /* We only need this during the prune_impossible_nodes pass in regexec.c; + skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ + if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) + || dfa->nbackref) + { + dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); + if (BE (dfa->inveclosures == NULL, 0)) + return REG_ESPACE; + ret = calc_inveclosure (dfa); + } + return ret; } @@ -1597,19 +1607,26 @@ duplicate_node (new_idx, dfa, org_idx, constraint) return REG_NOERROR; } -static void +static reg_errcode_t calc_inveclosure (dfa) re_dfa_t *dfa; { - int src, idx, dest; + int src, idx, ret; + for (idx = 0; idx < dfa->nodes_len; ++idx) + re_node_set_init_empty (dfa->inveclosures + idx); + for (src = 0; src < dfa->nodes_len; ++src) { + int *elems = dfa->eclosures[src].elems; for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) { - dest = dfa->eclosures[src].elems[idx]; - re_node_set_insert_last (dfa->inveclosures + dest, src); + ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); + if (BE (ret == -1, 0)) + return REG_ESPACE; } } + + return REG_NOERROR; } /* Calculate "eclosure" for all the node in DFA. */ @@ -3304,17 +3321,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) } } else +#endif /* not RE_ENABLE_I18N */ { +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* Build a tree for simple bracket. */ br_token.type = SIMPLE_BRACKET; br_token.opr.sbcset = sbcset; work_tree = create_token_tree (dfa, NULL, NULL, &br_token); if (BE (work_tree == NULL, 0)) goto parse_bracket_exp_espace; - - free_charset (mbcset); } -#endif /* not RE_ENABLE_I18N */ return work_tree; parse_bracket_exp_espace: diff --git a/posix/regex_internal.c b/posix/regex_internal.c index dbd3f24ca3..c3295a851c 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -1339,7 +1339,7 @@ re_dfa_add_node (dfa, token) { int new_nodes_alloc = dfa->nodes_alloc * 2; int *new_nexts, *new_indices; - re_node_set *new_edests, *new_eclosures, *new_inveclosures; + re_node_set *new_edests, *new_eclosures; re_token_t *new_array = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); @@ -1350,17 +1350,13 @@ re_dfa_add_node (dfa, token) new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); - new_inveclosures = re_realloc (dfa->inveclosures, re_node_set, - new_nodes_alloc); if (BE (new_nexts == NULL || new_indices == NULL - || new_edests == NULL || new_eclosures == NULL - || new_inveclosures == NULL, 0)) + || new_edests == NULL || new_eclosures == NULL, 0)) return -1; dfa->nexts = new_nexts; dfa->org_indices = new_indices; dfa->edests = new_edests; dfa->eclosures = new_eclosures; - dfa->inveclosures = new_inveclosures; dfa->nodes_alloc = new_nodes_alloc; } dfa->nodes[dfa->nodes_len] = token; @@ -1372,7 +1368,6 @@ re_dfa_add_node (dfa, token) dfa->nexts[dfa->nodes_len] = -1; re_node_set_init_empty (dfa->edests + dfa->nodes_len); re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); - re_node_set_init_empty (dfa->inveclosures + dfa->nodes_len); return dfa->nodes_len++; } diff --git a/posix/regexec.c b/posix/regexec.c index 3dc1398806..636396e6f7 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -605,6 +605,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, re_dfa_t *dfa = (re_dfa_t *)preg->buffer; int left_lim, right_lim, incr; int fl_longest_match, match_first, match_kind, match_last = -1; + int extra_nmatch; int sb, ch; #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) re_match_context_t mctx = { .dfa = dfa }; @@ -620,6 +621,9 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, mctx.dfa = dfa; #endif + extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; + nmatch -= extra_nmatch; + /* Check if the DFA haven't been compiled. */ if (BE (preg->used == 0 || dfa->init_state == NULL || dfa->init_state_word == NULL || dfa->init_state_nl == NULL @@ -882,11 +886,14 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, pmatch[reg_idx].rm_so += match_first; pmatch[reg_idx].rm_eo += match_first; } + for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) + { + pmatch[nmatch + reg_idx].rm_so = -1; + pmatch[nmatch + reg_idx].rm_eo = -1; + } if (dfa->subexp_map) - for (reg_idx = 0; - reg_idx + 1 < nmatch && reg_idx < preg->re_nsub; - reg_idx++) + for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) if (dfa->subexp_map[reg_idx] != reg_idx) { pmatch[reg_idx + 1].rm_so @@ -1371,7 +1378,7 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) int fl_backtrack; { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - int idx, cur_node, real_nmatch; + int idx, cur_node; re_node_set eps_via_nodes; struct re_fail_stack_t *fs; struct re_fail_stack_t fs_body = { 0, 2, NULL }; @@ -1392,15 +1399,14 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) fs = NULL; cur_node = dfa->init_node; - real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1; re_node_set_init_empty (&eps_via_nodes); - prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * real_nmatch); - memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * real_nmatch); + prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * nmatch); + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) { - update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, real_nmatch); + update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) { @@ -2469,10 +2475,13 @@ transit_state_mb (mctx, pstate) { re_node_set dest_nodes, *new_nodes; int cur_node_idx = pstate->nodes.elems[i]; - int naccepted = 0, dest_idx; + int naccepted, dest_idx; unsigned int context; re_dfastate_t *dest_state; + if (!dfa->nodes[cur_node_idx].accept_mb) + continue; + if (dfa->nodes[cur_node_idx].constraint) { context = re_string_context_at (&mctx->input, @@ -2484,9 +2493,8 @@ transit_state_mb (mctx, pstate) } /* How many bytes the node can accept? */ - if (dfa->nodes[cur_node_idx].accept_mb) - naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, - re_string_cur_idx (&mctx->input)); + naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, + re_string_cur_idx (&mctx->input)); if (naccepted == 0) continue; @@ -2500,9 +2508,7 @@ transit_state_mb (mctx, pstate) #ifdef DEBUG assert (dfa->nexts[cur_node_idx] != -1); #endif - /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE, - then we use pstate->nodes.elems[i] instead. */ - new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]]; + new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; dest_state = mctx->state_log[dest_idx]; if (dest_state == NULL) -- cgit v1.2.3