From 6291ee3c5fa34e3b1a9df315f24268b91c8ec89b Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 27 Nov 2002 23:00:16 +0000 Subject: Update. 2002-11-27 Isamu Hasegawa * posix/regcomp.c (parse_expression): Set the bit since the back reference is used in the regular expression. * posix/regex_internal.c (re_node_set_init_1): Make it clean in case of malloc failure. (re_node_set_init_copy): Likewise. * posix/regex_internal.h (state_array_t): New structure. (re_sub_match_last_t): Likewise. (re_sub_match_top_t): Likewise. (re_match_context_t): Add new members. (re_dfa_t): Likewise. * posix/regexec.c (re_search_internal): Invoke prune_impossible_nodes to check the matching is really correct, and retry if failed. Move the routin pruning the impossible nodes from here, ... (prune_impossible_nodes): To this function. (check_matching): Invoke check_subexp_matching_top, and replace redundant checking with transit_state_bkref invocation. (proceed_next_node): Replace strncmp with memcmp. Reported by Paolo Bonzini . (update_cur_sifted_state): Remove search_subexp invocation. (search_subexp): Remove this function. (check_dst_limits_calc_pos): Use search_cur_bkref_entry for optimization. (sift_states_bkref): Use search_cur_bkref_entry for optimization. Remove unused invocation of match_ctx_add_entry. (transit_state): Invoke check_subexp_matching_top. (check_subexp_matching_top): New function. (transit_state_bkref): Remove unused array. Merge transit_state_bkref_loop. (transit_state_bkref_loop): Use get_subexp instead of sift_states_backward. Use search_cur_bkref_entry for optimization. Merge this function to transit_state_bkref. (get_subexp): New function. (get_subexp_sub): Likewise. (find_subexp_node): Likewise. (check_arrival): Likewise. (check_arrival_expand_ecl): Likewise. (check_arrival_expand_ecl_sub): Likewise. (expand_bkref_cache): Likewise. (match_ctx_init): Initialize new members. (match_ctx_clean): New function. (match_ctx_free): Release new members. (match_ctx_free_subtops): New function. (match_ctx_add_entry): Fix indent. (search_cur_bkref_entry): New function. (match_ctx_add_subtop): Likewise. (match_ctx_add_sublast): Likewise. --- posix/regex_internal.h | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'posix/regex_internal.h') diff --git a/posix/regex_internal.h b/posix/regex_internal.h index a49f4d9f2f..50867878ed 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -401,6 +401,39 @@ struct re_state_table_entry re_dfastate_t **array; }; +/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ + +typedef struct +{ + int next_idx; + int alloc; + re_dfastate_t **array; +} state_array_t; + +/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ + +typedef struct +{ + int node; + int str_idx; /* The position NODE match at. */ + state_array_t path; +} re_sub_match_last_t; + +/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. + And information about the node, whose type is OP_CLOSE_SUBEXP, + corresponding to NODE is stored in LASTS. */ + +typedef struct +{ + int str_idx; + int node; + int next_last_offset; + state_array_t *path; + int alasts; /* Allocation size of LASTS. */ + int nlasts; /* The number of LASTS. */ + re_sub_match_last_t **lasts; +} re_sub_match_top_t; + struct re_backref_cache_entry { int node; @@ -427,6 +460,9 @@ typedef struct int abkref_ents; struct re_backref_cache_entry *bkref_ents; int max_mb_elem_len; + int nsub_tops; + int asub_tops; + re_sub_match_top_t **sub_tops; } re_match_context_t; typedef struct @@ -484,13 +520,15 @@ struct re_dfa_t int states_alloc; int init_node; int nbackref; /* The number of backreference in this dfa. */ - /* If this dfa has "multibyte node", which is a backreference or - a node which can accept multibyte character or multi character - collating element. */ + /* Bitmap expressing which backreference is used. */ + unsigned int used_bkref_map; #ifdef DEBUG char* re_str; #endif unsigned int has_plural_match : 1; + /* If this dfa has "multibyte node", which is a backreference or + a node which can accept multibyte character or multi character + collating element. */ unsigned int has_mb_node : 1; }; typedef struct re_dfa_t re_dfa_t; -- cgit v1.2.3