aboutsummaryrefslogtreecommitdiff
path: root/posix/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'posix/regcomp.c')
-rw-r--r--posix/regcomp.c178
1 files changed, 84 insertions, 94 deletions
diff --git a/posix/regcomp.c b/posix/regcomp.c
index adb9d04d8a..4f53944709 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -126,8 +126,8 @@ static reg_errcode_t build_charclass (unsigned RE_TRANSLATE_TYPE trans,
static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
unsigned RE_TRANSLATE_TYPE trans,
const unsigned char *class_name,
- const unsigned char *extra, int not,
- reg_errcode_t *err);
+ const unsigned char *extra,
+ int non_match, reg_errcode_t *err);
static bin_tree_t *create_tree (re_dfa_t *dfa,
bin_tree_t *left, bin_tree_t *right,
re_token_type_t type, int index);
@@ -862,11 +862,9 @@ init_dfa (dfa, pat_len)
dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
if (BE (dfa->sb_char == NULL, 0))
return REG_ESPACE;
-#ifdef _LIBC
if (dfa->is_utf8)
memset (dfa->sb_char, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
else
-#endif
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
for (j = 0; j < UINT_BITS; ++j, ++ch)
if (btowc (ch) != WEOF)
@@ -2567,33 +2565,41 @@ build_range_exp (sbcset, start_elem, end_elem)
if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
return REG_ERANGE;
- /* Check the space of the arrays. */
- if (BE (*range_alloc == mbcset->nranges, 0))
+ /* Got valid collation sequence values, add them as a new entry.
+ However, for !_LIBC we have no collation elements: if the
+ character set is single byte, the single byte character set
+ that we build below suffices. parse_bracket_exp passes
+ no MBCSET if dfa->mb_cur_max == 1. */
+ if (mbcset)
{
- /* There are not enough space, need realloc. */
- wchar_t *new_array_start, *new_array_end;
- int new_nranges;
-
- /* +1 in case of mbcset->nranges is 0. */
- new_nranges = 2 * mbcset->nranges + 1;
- /* Use realloc since mbcset->range_starts and mbcset->range_ends
- are NULL if *range_alloc == 0. */
- new_array_start = re_realloc (mbcset->range_starts, wchar_t,
- new_nranges);
- new_array_end = re_realloc (mbcset->range_ends, wchar_t,
- new_nranges);
-
- if (BE (new_array_start == NULL || new_array_end == NULL, 0))
- return REG_ESPACE;
-
- mbcset->range_starts = new_array_start;
- mbcset->range_ends = new_array_end;
- *range_alloc = new_nranges;
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
+ {
+ /* There is not enough space, need realloc. */
+ wchar_t *new_array_start, *new_array_end;
+ int new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ /* Use realloc since mbcset->range_starts and mbcset->range_ends
+ are NULL if *range_alloc == 0. */
+ new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+ new_nranges);
+
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_wc;
+ mbcset->range_ends[mbcset->nranges++] = end_wc;
}
- mbcset->range_starts[mbcset->nranges] = start_wc;
- mbcset->range_ends[mbcset->nranges++] = end_wc;
-
/* Build the table for single byte characters. */
for (wc = 0; wc <= SBC_MAX; ++wc)
{
@@ -2779,13 +2785,9 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
static inline reg_errcode_t
__attribute ((always_inline))
-# ifdef RE_ENABLE_I18N
build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
re_charset_t *mbcset;
int *range_alloc;
-# else /* not RE_ENABLE_I18N */
- build_range_exp (sbcset, start_elem, end_elem)
-# endif /* not RE_ENABLE_I18N */
re_bitset_ptr_t sbcset;
bracket_elem_t *start_elem, *end_elem;
{
@@ -2793,33 +2795,6 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
uint32_t start_collseq;
uint32_t end_collseq;
-# ifdef RE_ENABLE_I18N
- /* Check the space of the arrays. */
- if (BE (*range_alloc == mbcset->nranges, 0))
- {
- /* There are not enough space, need realloc. */
- uint32_t *new_array_start;
- uint32_t *new_array_end;
- int new_nranges;
-
- /* +1 in case of mbcset->nranges is 0. */
- new_nranges = 2 * mbcset->nranges + 1;
- /* Use realloc since mbcset->range_starts and mbcset->range_ends
- are NULL if *range_alloc == 0. */
- new_array_start = re_realloc (mbcset->range_starts, uint32_t,
- new_nranges);
- new_array_end = re_realloc (mbcset->range_ends, uint32_t,
- new_nranges);
-
- if (BE (new_array_start == NULL || new_array_end == NULL, 0))
- return REG_ESPACE;
-
- mbcset->range_starts = new_array_start;
- mbcset->range_ends = new_array_end;
- *range_alloc = new_nranges;
- }
-# endif /* RE_ENABLE_I18N */
-
/* Equivalence Classes and Character Classes can't be a range
start/end. */
if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
@@ -2835,11 +2810,38 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
return REG_ERANGE;
-# ifdef RE_ENABLE_I18N
- /* Got valid collation sequence values, add them as a new entry. */
- mbcset->range_starts[mbcset->nranges] = start_collseq;
- mbcset->range_ends[mbcset->nranges++] = end_collseq;
-# endif /* RE_ENABLE_I18N */
+ /* Got valid collation sequence values, add them as a new entry.
+ However, if we have no collation elements, and the character set
+ is single byte, the single byte character set that we
+ build below suffices. */
+ if (nrules > 0 || dfa->mb_cur_max > 1)
+ {
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
+ {
+ /* There is not enough space, need realloc. */
+ uint32_t *new_array_start;
+ uint32_t *new_array_end;
+ int new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+ new_nranges);
+
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_collseq;
+ mbcset->range_ends[mbcset->nranges++] = end_collseq;
+ }
/* Build the table for single byte characters. */
for (ch = 0; ch <= SBC_MAX; ch++)
@@ -2866,13 +2868,9 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
static inline reg_errcode_t
__attribute ((always_inline))
-# ifdef RE_ENABLE_I18N
build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
re_charset_t *mbcset;
int *coll_sym_alloc;
-# else /* not RE_ENABLE_I18N */
- build_collating_symbol (sbcset, name)
-# endif /* not RE_ENABLE_I18N */
re_bitset_ptr_t sbcset;
const unsigned char *name;
{
@@ -2898,7 +2896,6 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
else
return REG_ECOLLATE;
-# ifdef RE_ENABLE_I18N
/* Got valid collation sequence, add it as a new entry. */
/* Check the space of the arrays. */
if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
@@ -2916,7 +2913,6 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
*coll_sym_alloc = new_coll_sym_alloc;
}
mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
-# endif /* RE_ENABLE_I18N */
return REG_NOERROR;
}
else
@@ -2938,9 +2934,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
re_charset_t *mbcset;
int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
int equiv_class_alloc = 0, char_class_alloc = 0;
-#else /* not RE_ENABLE_I18N */
- int non_match = 0;
#endif /* not RE_ENABLE_I18N */
+ int non_match = 0;
bin_tree_t *work_tree;
int token_len;
int first_round = 1;
@@ -2985,9 +2980,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
{
#ifdef RE_ENABLE_I18N
mbcset->non_match = 1;
-#else /* not RE_ENABLE_I18N */
- non_match = 1;
#endif /* not RE_ENABLE_I18N */
+ non_match = 1;
if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
bitset_set (sbcset, '\0');
re_string_skip_bytes (regexp, token_len); /* Skip a token. */
@@ -3066,11 +3060,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
token_len = peek_token_bracket (token, regexp, syntax);
+#ifdef _LIBC
+ *err = build_range_exp (sbcset, mbcset, &range_alloc,
+ &start_elem, &end_elem);
+#else
+# ifdef RE_ENABLE_I18N
*err = build_range_exp (sbcset,
-#ifdef RE_ENABLE_I18N
- mbcset, &range_alloc,
+ dfa->mb_cur_max > 1 ? mbcset : NULL,
+ &range_alloc, &start_elem, &end_elem);
+# else
+ *err = build_range_exp (sbcset, &start_elem, &end_elem);
+# endif
#endif /* RE_ENABLE_I18N */
- &start_elem, &end_elem);
if (BE (*err != REG_NOERROR, 0))
goto parse_bracket_exp_free_return;
}
@@ -3144,12 +3145,9 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
re_string_skip_bytes (regexp, token_len); /* Skip a token. */
/* If it is non-matching list. */
-#ifdef RE_ENABLE_I18N
- if (mbcset->non_match)
-#else /* not RE_ENABLE_I18N */
if (non_match)
-#endif /* not RE_ENABLE_I18N */
bitset_not (sbcset);
+
#ifdef RE_ENABLE_I18N
/* Ensure only single byte characters are set. */
if (dfa->mb_cur_max > 1)
@@ -3324,7 +3322,7 @@ build_equiv_class (sbcset, name)
re_bitset_ptr_t sbcset;
const unsigned char *name;
{
-#if defined _LIBC && defined RE_ENABLE_I18N
+#if defined _LIBC
uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
if (nrules != 0)
{
@@ -3393,7 +3391,7 @@ build_equiv_class (sbcset, name)
mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
}
else
-#endif /* _LIBC && RE_ENABLE_I18N */
+#endif /* _LIBC */
{
if (BE (strlen ((const char *) name) != 1, 0))
return REG_ECOLLATE;
@@ -3489,20 +3487,18 @@ build_charclass (trans, sbcset, class_name, syntax)
}
static bin_tree_t *
-build_charclass_op (dfa, trans, class_name, extra, not, err)
+build_charclass_op (dfa, trans, class_name, extra, non_match, err)
re_dfa_t *dfa;
unsigned RE_TRANSLATE_TYPE trans;
const unsigned char *class_name;
const unsigned char *extra;
- int not;
+ int non_match;
reg_errcode_t *err;
{
re_bitset_ptr_t sbcset;
#ifdef RE_ENABLE_I18N
re_charset_t *mbcset;
int alloc = 0;
-#else /* not RE_ENABLE_I18N */
- int non_match = 0;
#endif /* not RE_ENABLE_I18N */
reg_errcode_t ret;
re_token_t br_token;
@@ -3523,7 +3519,7 @@ build_charclass_op (dfa, trans, class_name, extra, not, err)
return NULL;
}
- if (not)
+ if (non_match)
{
#ifdef RE_ENABLE_I18N
/*
@@ -3531,8 +3527,6 @@ build_charclass_op (dfa, trans, class_name, extra, not, err)
bitset_set(cset->sbcset, '\0');
*/
mbcset->non_match = 1;
-#else /* not RE_ENABLE_I18N */
- non_match = 1;
#endif /* not RE_ENABLE_I18N */
}
@@ -3557,11 +3551,7 @@ build_charclass_op (dfa, trans, class_name, extra, not, err)
bitset_set (sbcset, *extra);
/* If it is non-matching list. */
-#ifdef RE_ENABLE_I18N
- if (mbcset->non_match)
-#else /* not RE_ENABLE_I18N */
if (non_match)
-#endif /* not RE_ENABLE_I18N */
bitset_not (sbcset);
#ifdef RE_ENABLE_I18N