From d0b96fc49b1535d820fe9680746677e55b8e83db Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Mon, 6 Oct 2003 23:44:15 +0000 Subject: Update. * posix/bug-regex11.c: Add some more tests which fail so far. Disable them. Patch by Paolo Bonzini . 2003-10-05 Paolo Bonzini * posix/bug-regex11.c: Add more backreference-related test cases. (main): Show the failing regex in the error messages. * posix/regexec.c (check_dst_limits_calc_pos): Simplify some nested conditionals. Replace if's with a switch statement. (check_dst_limits_calc_pos ): Rename parameter NODE to FROM_NODE, it shadows a local variable; don't recurse if FROM_NODE does not change in the recursive invocation, fixing an infinite loop in the ()\1*\1* regex. (sift_states_backward): Fix function comment. * posix/regcomp.c (calc_epsdest): Add an assertion. 2003-10-06 Ulrich Drepper --- posix/bug-regex11.c | 21 +++++++++--- posix/regexec.c | 92 +++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 82 insertions(+), 31 deletions(-) (limited to 'posix') diff --git a/posix/bug-regex11.c b/posix/bug-regex11.c index 40fd7c27f5..c7a8b6537a 100644 --- a/posix/bug-regex11.c +++ b/posix/bug-regex11.c @@ -35,6 +35,7 @@ struct /* Test for newline handling in regex. */ { "[^~]*~", "\nx~y", 0, 2, { { 0, 3 }, { -1, -1 } } }, /* Other tests. */ + { "a(.*)b", "a b", REG_EXTENDED, 2, { { 0, 3 }, { 1, 2 } } }, { ".*|\\([KIO]\\)\\([^|]*\\).*|?[KIO]", "10~.~|P|K0|I10|O16|?KSb", 0, 3, { { 0, 21 }, { 15, 16 }, { 16, 18 } } }, { ".*|\\([KIO]\\)\\([^|]*\\).*|?\\1", "10~.~|P|K0|I10|O16|?KSb", 0, 3, @@ -52,6 +53,18 @@ struct /* Here ^ cannot be treated as an anchor according to POSIX. */ { "(^|foo)bar", "(^|foo)bar", 0, 2, { { 0, 10 }, { -1, -1 } } }, { "(foo|^)bar", "(foo|^)bar", 0, 2, { { 0, 10 }, { -1, -1 } } }, + /* More tests on backreferences. */ + { "()\\1*\\1*", "", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } }, + { "([0-9]).*\\1(a*)", "7;7a6", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } }, + { "([0-9]).*\\1(a*)", "7;7a", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } }, +#if 0 + /* XXX This test seems wrong. --drepper */ + { "()(b)\\1c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 1, 2 } } }, + + /* XXX Not used since they fail so far. */ + { "(b())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 2 }, { 0, 1 }, { 1, 1 } } }, + { "(bb())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 4 }, { 0, 2 }, { 2, 2 } } }, +#endif }; int @@ -71,14 +84,14 @@ main (void) { char buf[500]; regerror (n, &re, buf, sizeof (buf)); - printf ("regcomp %zd failed: %s\n", i, buf); + printf ("%s: regcomp %zd failed: %s\n", tests[i].pattern, i, buf); ret = 1; continue; } if (regexec (&re, tests[i].string, tests[i].nmatch, rm, 0)) { - printf ("regexec %zd failed\n", i); + printf ("%s: regexec %zd failed\n", tests[i].pattern, i); ret = 1; regfree (&re); continue; @@ -90,8 +103,8 @@ main (void) { if (tests[i].rm[n].rm_so == -1 && tests[i].rm[n].rm_eo == -1) break; - printf ("regexec match failure rm[%d] %d..%d\n", - n, rm[n].rm_so, rm[n].rm_eo); + printf ("%s: regexec %zd match failure rm[%d] %d..%d\n", + tests[i].pattern, i, n, rm[n].rm_so, rm[n].rm_eo); ret = 1; break; } diff --git a/posix/regexec.c b/posix/regexec.c index 39a27d2fed..3cd8beaba1 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -1387,7 +1387,7 @@ update_regs (dfa, pmatch, cur_node, cur_idx, nmatch) away the node `a'. ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is throwed away, we throw away the node `a'. - 3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b': + 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the node `a'. ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away, @@ -1724,60 +1724,98 @@ check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx) } static int -check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node, +check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node, str_idx) re_dfa_t *dfa; re_match_context_t *mctx; re_node_set *eclosures; - int limit, subexp_idx, node, str_idx; + int limit, subexp_idx, from_node, str_idx; { struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; - int pos = (str_idx < lim->subexp_from ? -1 - : (lim->subexp_to < str_idx ? 1 : 0)); - if (pos == 0 - && (str_idx == lim->subexp_from || str_idx == lim->subexp_to)) - { int node_idx; + + /* If we are outside the range of the subexpression, return -1 or 1. */ + if (str_idx < lim->subexp_from) + return -1; + + if (lim->subexp_to < str_idx) + return 1; + + /* If we are within the subexpression, return 0. */ + if (str_idx != lim->subexp_from && str_idx != lim->subexp_to) + return 0; + + /* Else, we are on the boundary: examine the nodes on the epsilon + closure. */ for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) { int node = eclosures->elems[node_idx]; - re_token_type_t type= dfa->nodes[node].type; - if (type == OP_BACK_REF) + switch (dfa->nodes[node].type) + { + case OP_BACK_REF: { int bi = search_cur_bkref_entry (mctx, str_idx); for (; bi < mctx->nbkref_ents; ++bi) { struct re_backref_cache_entry *ent = mctx->bkref_ents + bi; + int dst, cpos; + + /* If this backreference goes beyond the point we're + examining, don't go any further. */ if (ent->str_idx > str_idx) break; - if (ent->node == node && ent->subexp_from == ent->subexp_to) - { - int cpos, dst; + + if (ent->node != node || ent->subexp_from != ent->subexp_to) + continue; + + /* Recurse trying to reach the OP_OPEN_SUBEXP and + OP_CLOSE_SUBEXP cases below. But, if the + destination node is the same node as the source + node, don't recurse because it would cause an + infinite loop: a regex that exhibits this behavior + is ()\1*\1* */ dst = dfa->edests[node].elems[0]; + if (dst == from_node) + { + if (str_idx == lim->subexp_from) + return -1; + else /* if (str_idx == lim->subexp_to) */ + return 0; + } + cpos = check_dst_limits_calc_pos (dfa, mctx, limit, dfa->eclosures + dst, subexp_idx, dst, str_idx); - if ((str_idx == lim->subexp_from && cpos == -1) - || (str_idx == lim->subexp_to && cpos == 0)) - return cpos; - } - } + + if (cpos == -1 && str_idx == lim->subexp_from) + return -1; + + if (cpos == 0 /* && str_idx == lim->lim->subexp_to */) + return 0; } - if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx - && str_idx == lim->subexp_from) - { - pos = -1; break; } - if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx - && str_idx == lim->subexp_to) + + case OP_OPEN_SUBEXP: + if (str_idx == lim->subexp_from && subexp_idx == dfa->nodes[node].opr.idx) + return -1; + break; + + case OP_CLOSE_SUBEXP: + if (str_idx == lim->subexp_to && subexp_idx == dfa->nodes[node].opr.idx) + return 0; + break; + + default: break; } - if (node_idx == eclosures->nelem && str_idx == lim->subexp_to) - pos = 1; } - return pos; + + if (str_idx == lim->subexp_to) + return 1; + else + return 0; } /* Check the limitations of sub expressions LIMITS, and remove the nodes -- cgit v1.2.3