diff options
author | Ulrich Drepper <drepper@redhat.com> | 2003-11-18 23:40:59 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2003-11-18 23:40:59 +0000 |
commit | ad7f28c29d06ddb4506d0d75e089732740b5bd2b (patch) | |
tree | de9ee40c2d213a4113e3da2b8cedc3d505386bc1 /posix/regex_internal.h | |
parent | 5146ec9a2c092cb74b5cd0eb8b5e938b46f1631b (diff) | |
download | glibc-ad7f28c29d06ddb4506d0d75e089732740b5bd2b.tar glibc-ad7f28c29d06ddb4506d0d75e089732740b5bd2b.tar.gz glibc-ad7f28c29d06ddb4506d0d75e089732740b5bd2b.tar.bz2 glibc-ad7f28c29d06ddb4506d0d75e089732740b5bd2b.zip |
Update.
* posix/regex_internal.h (re_token_type_t): Remove unused ALT,
END_OF_RE_TOKEN_T and SUBEXP. Reorder values. Add OP_UTF8_PERIOD
and EPSILON_BIT.
(IS_EPSILON_NODE): Just test if EPSILON_BIT is set.
(ACCEPT_MB_NODE): Return 1 for OP_UTF8_PERIOD as well.
* posix/regex_internal.c (create_ci_newstate, create_cd_newstate):
Handle OP_UTF8_PERIOD.
(re_string_reconstruct): Set valid_len for single byte char searching
with no translation and case sensitivity.
* posix/regcomp.c (re_compile_fastmap_iter, calc_first): Handle
OP_UTF8_PERIOD.
(re_compile_internal): Don't call optimize_utf8 if preg->translate
!= NULL.
(optimize_utf8): Remove BACK_SLASH case.
Transform OP_PERIOD into OP_UTF8_PERIOD if the searching can be
optimized.
(parse_bracket_exp): Don't create SIMPLE_BRACKET if it doesn't have
any bits set and COMPLEX_BRACKET is used.
* posix/regexec.c (transit_state_mb): Fix comment typo.
(group_nodes_into_DFAstates, check_node_accept): Handle
OP_UTF8_PERIOD.
(check_node_accept_bytes): Likewise. Reorder slightly so that
re_string_char_size_at and re_string_elem_size_at are called
only when needed.
* posix/bug-regex20.c (BRE, ERE): Define.
(tests): Use them to make lines shorter. Expect . to be
optimized. Add lots of new tests.
(main): Run (ATM just case sensitive) test with backwards searching
as well.
2003-11-18 Jakub Jelinek <jakub@redhat.com>
Diffstat (limited to 'posix/regex_internal.h')
-rw-r--r-- | posix/regex_internal.h | 59 |
1 files changed, 27 insertions, 32 deletions
diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 9fcf865f65..f905d2b510 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -167,8 +167,31 @@ typedef enum { NON_TYPE = 0, + /* Node type, These are used by token, node, tree. */ + CHARACTER = 1, + END_OF_RE = 2, + SIMPLE_BRACKET = 3, + OP_BACK_REF = 4, + OP_PERIOD = 5, +#ifdef RE_ENABLE_I18N + COMPLEX_BRACKET = 6, + OP_UTF8_PERIOD = 7, +#endif /* RE_ENABLE_I18N */ + + EPSILON_BIT = 8, + OP_OPEN_SUBEXP = EPSILON_BIT | 0, + OP_CLOSE_SUBEXP = EPSILON_BIT | 1, + OP_ALT = EPSILON_BIT | 2, + OP_DUP_ASTERISK = EPSILON_BIT | 3, + OP_DUP_PLUS = EPSILON_BIT | 4, + OP_DUP_QUESTION = EPSILON_BIT | 5, + ANCHOR = EPSILON_BIT | 6, + + /* Tree type, these are used only by tree. */ + CONCAT = 16, + /* Token type, these are used only by token. */ - OP_OPEN_BRACKET, + OP_OPEN_BRACKET = 17, OP_CLOSE_BRACKET, OP_CHARSET_RANGE, OP_OPEN_DUP_NUM, @@ -184,32 +207,8 @@ typedef enum OP_NOTWORD, OP_SPACE, OP_NOTSPACE, - BACK_SLASH, + BACK_SLASH - /* Tree type, these are used only by tree. */ - CONCAT, - ALT, - SUBEXP, - SIMPLE_BRACKET, -#ifdef RE_ENABLE_I18N - COMPLEX_BRACKET, -#endif /* RE_ENABLE_I18N */ - - /* Node type, These are used by token, node, tree. */ - OP_OPEN_SUBEXP, - OP_CLOSE_SUBEXP, - OP_PERIOD, - CHARACTER, - END_OF_RE, - OP_ALT, - OP_DUP_ASTERISK, - OP_DUP_PLUS, - OP_DUP_QUESTION, - OP_BACK_REF, - ANCHOR, - - /* Dummy marker. */ - END_OF_RE_TOKEN_T } re_token_type_t; #ifdef RE_ENABLE_I18N @@ -284,13 +283,9 @@ typedef struct #endif } re_token_t; -#define IS_EPSILON_NODE(type) \ - ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \ - || (type) == OP_DUP_QUESTION || (type) == ANCHOR \ - || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP) - +#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) #define ACCEPT_MB_NODE(type) \ - ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD) + ((type) >= OP_PERIOD && (type) <= OP_UTF8_PERIOD) struct re_string_t { |