From 10677727e24ccda9f364aa1f043a97a9204947a9 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 14 Jan 2004 02:42:20 +0000 Subject: Update. 2004-01-12 Paolo Bonzini * posix/regcomp.c [_LIBC && !RE_ENABLE_I18N]: Drop code to support this, it is never true. (build_range_exp) [!_LIBC]: Do not create a range in MBCSET for a single-byte character set. (build_range_exp) [_LIBC]: Do not create a range in MBCSET for a single-byte character set without collation elements. (init_dfa): Do not conditionalize on _LIBC, it just makes the code less clear. (parse_bracket_exp): Use NON_MATCH variable in addition to "mbcset->non_match", not as an alternative. (build_charclass_op): rename NOT parameter to NON_MATCH, use it instead of declaring a variable. (parse_bracket_exp) [!_LIBC]: Pass NULL for MBCSET if the character set is single-byte. --- posix/regcomp.c | 178 ++++++++++++++++++++++++++------------------------------ 1 file changed, 84 insertions(+), 94 deletions(-) (limited to 'posix') diff --git a/posix/regcomp.c b/posix/regcomp.c index adb9d04d8a..4f53944709 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -126,8 +126,8 @@ static reg_errcode_t build_charclass (unsigned RE_TRANSLATE_TYPE trans, static bin_tree_t *build_charclass_op (re_dfa_t *dfa, unsigned RE_TRANSLATE_TYPE trans, const unsigned char *class_name, - const unsigned char *extra, int not, - reg_errcode_t *err); + const unsigned char *extra, + int non_match, reg_errcode_t *err); static bin_tree_t *create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, re_token_type_t type, int index); @@ -862,11 +862,9 @@ init_dfa (dfa, pat_len) dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1); if (BE (dfa->sb_char == NULL, 0)) return REG_ESPACE; -#ifdef _LIBC if (dfa->is_utf8) memset (dfa->sb_char, 255, sizeof (unsigned int) * BITSET_UINTS / 2); else -#endif for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) if (btowc (ch) != WEOF) @@ -2567,33 +2565,41 @@ build_range_exp (sbcset, start_elem, end_elem) if (wcscoll (cmp_buf, cmp_buf + 4) > 0) return REG_ERANGE; - /* Check the space of the arrays. */ - if (BE (*range_alloc == mbcset->nranges, 0)) + /* Got valid collation sequence values, add them as a new entry. + However, for !_LIBC we have no collation elements: if the + character set is single byte, the single byte character set + that we build below suffices. parse_bracket_exp passes + no MBCSET if dfa->mb_cur_max == 1. */ + if (mbcset) { - /* There are not enough space, need realloc. */ - wchar_t *new_array_start, *new_array_end; - int new_nranges; - - /* +1 in case of mbcset->nranges is 0. */ - new_nranges = 2 * mbcset->nranges + 1; - /* Use realloc since mbcset->range_starts and mbcset->range_ends - are NULL if *range_alloc == 0. */ - new_array_start = re_realloc (mbcset->range_starts, wchar_t, - new_nranges); - new_array_end = re_realloc (mbcset->range_ends, wchar_t, - new_nranges); - - if (BE (new_array_start == NULL || new_array_end == NULL, 0)) - return REG_ESPACE; - - mbcset->range_starts = new_array_start; - mbcset->range_ends = new_array_end; - *range_alloc = new_nranges; + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + wchar_t *new_array_start, *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + /* Use realloc since mbcset->range_starts and mbcset->range_ends + are NULL if *range_alloc == 0. */ + new_array_start = re_realloc (mbcset->range_starts, wchar_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, wchar_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_wc; + mbcset->range_ends[mbcset->nranges++] = end_wc; } - mbcset->range_starts[mbcset->nranges] = start_wc; - mbcset->range_ends[mbcset->nranges++] = end_wc; - /* Build the table for single byte characters. */ for (wc = 0; wc <= SBC_MAX; ++wc) { @@ -2779,13 +2785,9 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) static inline reg_errcode_t __attribute ((always_inline)) -# ifdef RE_ENABLE_I18N build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) re_charset_t *mbcset; int *range_alloc; -# else /* not RE_ENABLE_I18N */ - build_range_exp (sbcset, start_elem, end_elem) -# endif /* not RE_ENABLE_I18N */ re_bitset_ptr_t sbcset; bracket_elem_t *start_elem, *end_elem; { @@ -2793,33 +2795,6 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) uint32_t start_collseq; uint32_t end_collseq; -# ifdef RE_ENABLE_I18N - /* Check the space of the arrays. */ - if (BE (*range_alloc == mbcset->nranges, 0)) - { - /* There are not enough space, need realloc. */ - uint32_t *new_array_start; - uint32_t *new_array_end; - int new_nranges; - - /* +1 in case of mbcset->nranges is 0. */ - new_nranges = 2 * mbcset->nranges + 1; - /* Use realloc since mbcset->range_starts and mbcset->range_ends - are NULL if *range_alloc == 0. */ - new_array_start = re_realloc (mbcset->range_starts, uint32_t, - new_nranges); - new_array_end = re_realloc (mbcset->range_ends, uint32_t, - new_nranges); - - if (BE (new_array_start == NULL || new_array_end == NULL, 0)) - return REG_ESPACE; - - mbcset->range_starts = new_array_start; - mbcset->range_ends = new_array_end; - *range_alloc = new_nranges; - } -# endif /* RE_ENABLE_I18N */ - /* Equivalence Classes and Character Classes can't be a range start/end. */ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS @@ -2835,11 +2810,38 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) return REG_ERANGE; -# ifdef RE_ENABLE_I18N - /* Got valid collation sequence values, add them as a new entry. */ - mbcset->range_starts[mbcset->nranges] = start_collseq; - mbcset->range_ends[mbcset->nranges++] = end_collseq; -# endif /* RE_ENABLE_I18N */ + /* Got valid collation sequence values, add them as a new entry. + However, if we have no collation elements, and the character set + is single byte, the single byte character set that we + build below suffices. */ + if (nrules > 0 || dfa->mb_cur_max > 1) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + uint32_t *new_array_start; + uint32_t *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + new_array_start = re_realloc (mbcset->range_starts, uint32_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, uint32_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_collseq; + mbcset->range_ends[mbcset->nranges++] = end_collseq; + } /* Build the table for single byte characters. */ for (ch = 0; ch <= SBC_MAX; ch++) @@ -2866,13 +2868,9 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) static inline reg_errcode_t __attribute ((always_inline)) -# ifdef RE_ENABLE_I18N build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) re_charset_t *mbcset; int *coll_sym_alloc; -# else /* not RE_ENABLE_I18N */ - build_collating_symbol (sbcset, name) -# endif /* not RE_ENABLE_I18N */ re_bitset_ptr_t sbcset; const unsigned char *name; { @@ -2898,7 +2896,6 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) else return REG_ECOLLATE; -# ifdef RE_ENABLE_I18N /* Got valid collation sequence, add it as a new entry. */ /* Check the space of the arrays. */ if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) @@ -2916,7 +2913,6 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) *coll_sym_alloc = new_coll_sym_alloc; } mbcset->coll_syms[mbcset->ncoll_syms++] = idx; -# endif /* RE_ENABLE_I18N */ return REG_NOERROR; } else @@ -2938,9 +2934,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) re_charset_t *mbcset; int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; int equiv_class_alloc = 0, char_class_alloc = 0; -#else /* not RE_ENABLE_I18N */ - int non_match = 0; #endif /* not RE_ENABLE_I18N */ + int non_match = 0; bin_tree_t *work_tree; int token_len; int first_round = 1; @@ -2985,9 +2980,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) { #ifdef RE_ENABLE_I18N mbcset->non_match = 1; -#else /* not RE_ENABLE_I18N */ - non_match = 1; #endif /* not RE_ENABLE_I18N */ + non_match = 1; if (syntax & RE_HAT_LISTS_NOT_NEWLINE) bitset_set (sbcset, '\0'); re_string_skip_bytes (regexp, token_len); /* Skip a token. */ @@ -3066,11 +3060,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) token_len = peek_token_bracket (token, regexp, syntax); +#ifdef _LIBC + *err = build_range_exp (sbcset, mbcset, &range_alloc, + &start_elem, &end_elem); +#else +# ifdef RE_ENABLE_I18N *err = build_range_exp (sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &range_alloc, + dfa->mb_cur_max > 1 ? mbcset : NULL, + &range_alloc, &start_elem, &end_elem); +# else + *err = build_range_exp (sbcset, &start_elem, &end_elem); +# endif #endif /* RE_ENABLE_I18N */ - &start_elem, &end_elem); if (BE (*err != REG_NOERROR, 0)) goto parse_bracket_exp_free_return; } @@ -3144,12 +3145,9 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) re_string_skip_bytes (regexp, token_len); /* Skip a token. */ /* If it is non-matching list. */ -#ifdef RE_ENABLE_I18N - if (mbcset->non_match) -#else /* not RE_ENABLE_I18N */ if (non_match) -#endif /* not RE_ENABLE_I18N */ bitset_not (sbcset); + #ifdef RE_ENABLE_I18N /* Ensure only single byte characters are set. */ if (dfa->mb_cur_max > 1) @@ -3324,7 +3322,7 @@ build_equiv_class (sbcset, name) re_bitset_ptr_t sbcset; const unsigned char *name; { -#if defined _LIBC && defined RE_ENABLE_I18N +#if defined _LIBC uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); if (nrules != 0) { @@ -3393,7 +3391,7 @@ build_equiv_class (sbcset, name) mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; } else -#endif /* _LIBC && RE_ENABLE_I18N */ +#endif /* _LIBC */ { if (BE (strlen ((const char *) name) != 1, 0)) return REG_ECOLLATE; @@ -3489,20 +3487,18 @@ build_charclass (trans, sbcset, class_name, syntax) } static bin_tree_t * -build_charclass_op (dfa, trans, class_name, extra, not, err) +build_charclass_op (dfa, trans, class_name, extra, non_match, err) re_dfa_t *dfa; unsigned RE_TRANSLATE_TYPE trans; const unsigned char *class_name; const unsigned char *extra; - int not; + int non_match; reg_errcode_t *err; { re_bitset_ptr_t sbcset; #ifdef RE_ENABLE_I18N re_charset_t *mbcset; int alloc = 0; -#else /* not RE_ENABLE_I18N */ - int non_match = 0; #endif /* not RE_ENABLE_I18N */ reg_errcode_t ret; re_token_t br_token; @@ -3523,7 +3519,7 @@ build_charclass_op (dfa, trans, class_name, extra, not, err) return NULL; } - if (not) + if (non_match) { #ifdef RE_ENABLE_I18N /* @@ -3531,8 +3527,6 @@ build_charclass_op (dfa, trans, class_name, extra, not, err) bitset_set(cset->sbcset, '\0'); */ mbcset->non_match = 1; -#else /* not RE_ENABLE_I18N */ - non_match = 1; #endif /* not RE_ENABLE_I18N */ } @@ -3557,11 +3551,7 @@ build_charclass_op (dfa, trans, class_name, extra, not, err) bitset_set (sbcset, *extra); /* If it is non-matching list. */ -#ifdef RE_ENABLE_I18N - if (mbcset->non_match) -#else /* not RE_ENABLE_I18N */ if (non_match) -#endif /* not RE_ENABLE_I18N */ bitset_not (sbcset); #ifdef RE_ENABLE_I18N -- cgit v1.2.3