From ad7f28c29d06ddb4506d0d75e089732740b5bd2b Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 18 Nov 2003 23:40:59 +0000 Subject: Update. * posix/regex_internal.h (re_token_type_t): Remove unused ALT, END_OF_RE_TOKEN_T and SUBEXP. Reorder values. Add OP_UTF8_PERIOD and EPSILON_BIT. (IS_EPSILON_NODE): Just test if EPSILON_BIT is set. (ACCEPT_MB_NODE): Return 1 for OP_UTF8_PERIOD as well. * posix/regex_internal.c (create_ci_newstate, create_cd_newstate): Handle OP_UTF8_PERIOD. (re_string_reconstruct): Set valid_len for single byte char searching with no translation and case sensitivity. * posix/regcomp.c (re_compile_fastmap_iter, calc_first): Handle OP_UTF8_PERIOD. (re_compile_internal): Don't call optimize_utf8 if preg->translate != NULL. (optimize_utf8): Remove BACK_SLASH case. Transform OP_PERIOD into OP_UTF8_PERIOD if the searching can be optimized. (parse_bracket_exp): Don't create SIMPLE_BRACKET if it doesn't have any bits set and COMPLEX_BRACKET is used. * posix/regexec.c (transit_state_mb): Fix comment typo. (group_nodes_into_DFAstates, check_node_accept): Handle OP_UTF8_PERIOD. (check_node_accept_bytes): Likewise. Reorder slightly so that re_string_char_size_at and re_string_elem_size_at are called only when needed. * posix/bug-regex20.c (BRE, ERE): Define. (tests): Use them to make lines shorter. Expect . to be optimized. Add lots of new tests. (main): Run (ATM just case sensitive) test with backwards searching as well. 2003-11-18 Jakub Jelinek --- posix/regex_internal.h | 59 +++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 32 deletions(-) (limited to 'posix/regex_internal.h') diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 9fcf865f65..f905d2b510 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -167,8 +167,31 @@ typedef enum { NON_TYPE = 0, + /* Node type, These are used by token, node, tree. */ + CHARACTER = 1, + END_OF_RE = 2, + SIMPLE_BRACKET = 3, + OP_BACK_REF = 4, + OP_PERIOD = 5, +#ifdef RE_ENABLE_I18N + COMPLEX_BRACKET = 6, + OP_UTF8_PERIOD = 7, +#endif /* RE_ENABLE_I18N */ + + EPSILON_BIT = 8, + OP_OPEN_SUBEXP = EPSILON_BIT | 0, + OP_CLOSE_SUBEXP = EPSILON_BIT | 1, + OP_ALT = EPSILON_BIT | 2, + OP_DUP_ASTERISK = EPSILON_BIT | 3, + OP_DUP_PLUS = EPSILON_BIT | 4, + OP_DUP_QUESTION = EPSILON_BIT | 5, + ANCHOR = EPSILON_BIT | 6, + + /* Tree type, these are used only by tree. */ + CONCAT = 16, + /* Token type, these are used only by token. */ - OP_OPEN_BRACKET, + OP_OPEN_BRACKET = 17, OP_CLOSE_BRACKET, OP_CHARSET_RANGE, OP_OPEN_DUP_NUM, @@ -184,32 +207,8 @@ typedef enum OP_NOTWORD, OP_SPACE, OP_NOTSPACE, - BACK_SLASH, + BACK_SLASH - /* Tree type, these are used only by tree. */ - CONCAT, - ALT, - SUBEXP, - SIMPLE_BRACKET, -#ifdef RE_ENABLE_I18N - COMPLEX_BRACKET, -#endif /* RE_ENABLE_I18N */ - - /* Node type, These are used by token, node, tree. */ - OP_OPEN_SUBEXP, - OP_CLOSE_SUBEXP, - OP_PERIOD, - CHARACTER, - END_OF_RE, - OP_ALT, - OP_DUP_ASTERISK, - OP_DUP_PLUS, - OP_DUP_QUESTION, - OP_BACK_REF, - ANCHOR, - - /* Dummy marker. */ - END_OF_RE_TOKEN_T } re_token_type_t; #ifdef RE_ENABLE_I18N @@ -284,13 +283,9 @@ typedef struct #endif } re_token_t; -#define IS_EPSILON_NODE(type) \ - ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \ - || (type) == OP_DUP_QUESTION || (type) == ANCHOR \ - || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP) - +#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) #define ACCEPT_MB_NODE(type) \ - ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD) + ((type) >= OP_PERIOD && (type) <= OP_UTF8_PERIOD) struct re_string_t { -- cgit v1.2.3