diff options
Diffstat (limited to 'posix/regcomp.c')
-rw-r--r-- | posix/regcomp.c | 240 |
1 files changed, 142 insertions, 98 deletions
diff --git a/posix/regcomp.c b/posix/regcomp.c index 12da043062..0b85f7db4b 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -63,7 +63,7 @@ static void re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, char *fastmap); static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); -static void init_word_char (re_dfa_t *dfa); +static reg_errcode_t init_word_char (re_dfa_t *dfa); static void free_charset (re_charset_t *cset); static void free_workarea_compile (regex_t *preg); static reg_errcode_t create_initial_state (re_dfa_t *dfa); @@ -72,10 +72,11 @@ static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node); static void calc_first (re_dfa_t *dfa, bin_tree_t *node); static void calc_next (re_dfa_t *dfa, bin_tree_t *node); static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node); -static int duplicate_node (re_dfa_t *dfa, int org_idx, - unsigned int constraint); +static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx, + unsigned int constraint); static reg_errcode_t calc_eclosure (re_dfa_t *dfa); -static re_node_set calc_eclosure_iter (re_dfa_t *dfa, int node, int root); +static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, + int node, int root); static void calc_inveclosure (re_dfa_t *dfa); static int fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax); @@ -446,8 +447,8 @@ regcomp (preg, pattern, cflags) if (ret == REG_ERPAREN) ret = REG_EPAREN; - /* XXX Why the test for preg->fastmap != NULL? */ - if (ret == REG_NOERROR && preg->fastmap != NULL) + /* We have already checked preg->fastmap != NULL. */ + if (ret == REG_NOERROR) { /* Compute the fastmap now, since regexec cannot modify the pattern buffer. */ @@ -772,16 +773,19 @@ init_dfa (dfa, pat_len) "word". In this case "word" means that it is the word construction character used by some operators like "\<", "\>", etc. */ -static void +static reg_errcode_t init_word_char (dfa) re_dfa_t *dfa; { int i, j, ch; dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1); + if (dfa->word_char == NULL) + return REG_ESPACE; for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) if (isalnum (ch) || ch == '_') dfa->word_char[i] |= 1 << j; + return REG_NOERROR; } /* Free the work area which are only used while compiling. */ @@ -844,24 +848,28 @@ create_initial_state (dfa) } /* It must be the first time to invoke acquire_state. */ - dfa->init_state = re_acquire_state_context (dfa, &init_nodes, 0); + dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); + /* We don't check ERR here, since the initial state must not be NULL. */ + if (dfa->init_state == NULL) + return err; if (dfa->init_state->has_constraint) { - dfa->init_state_word = re_acquire_state_context (dfa, &init_nodes, + dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, CONTEXT_WORD); - dfa->init_state_nl = re_acquire_state_context (dfa, &init_nodes, + dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, CONTEXT_NEWLINE); - dfa->init_state_begbuf = re_acquire_state_context (dfa, &init_nodes, + dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, + &init_nodes, CONTEXT_NEWLINE | CONTEXT_BEGBUF); + if (dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL) + return err; } else dfa->init_state_word = dfa->init_state_nl = dfa->init_state_begbuf = dfa->init_state; - if (dfa->init_state == NULL || dfa->init_state_word == NULL - || dfa->init_state_nl == NULL || dfa->init_state_begbuf == NULL ) - return REG_ESPACE; re_node_set_free (&init_nodes); return REG_NOERROR; } @@ -1114,20 +1122,30 @@ calc_epsdest (dfa, node) } } -static int -duplicate_node (dfa, org_idx, constraint) +/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. + The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded, + otherwise return the error code. */ + +static reg_errcode_t +duplicate_node (new_idx, dfa, org_idx, constraint) re_dfa_t *dfa; - int org_idx; + int *new_idx, org_idx; unsigned int constraint; { re_token_t dup; int dup_idx; + reg_errcode_t err; dup.type = OP_CONTEXT_NODE; if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE) { + /* If the node whose index is ORG_IDX is the same as the intended + node, use it. */ if (dfa->nodes[org_idx].constraint == constraint) - return org_idx; + { + *new_idx = org_idx; + return REG_NOERROR; + } dup.constraint = constraint | dfa->nodes[org_idx].constraint; } @@ -1137,23 +1155,32 @@ duplicate_node (dfa, org_idx, constraint) /* In case that `entity' points OP_CONTEXT_NODE, we correct `entity' to real entity in calc_inveclosures(). */ dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info)); + dup_idx = re_dfa_add_node (dfa, dup, 1); + if (dup.opr.ctx_info == NULL || dup_idx == -1) + return REG_ESPACE; dup.opr.ctx_info->entity = org_idx; dup.opr.ctx_info->bkref_eclosure = NULL; - dup_idx = re_dfa_add_node (dfa, dup, 1); - dfa->nodes[dup_idx].duplicated = 1; + dfa->nodes[dup_idx].duplicated = 1; dfa->firsts[dup_idx] = dfa->firsts[org_idx]; dfa->nexts[dup_idx] = dfa->nexts[org_idx]; - re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx); + err = re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx); + if (err != REG_NOERROR) + return err; /* Since we don't duplicate epsilon nodes, epsilon closure have only itself. */ - re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx); - re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx); + err = re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx); + if (err != REG_NOERROR) + return err; + err = re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx); + if (err != REG_NOERROR) + return err; /* Then we must update inveclosure for this node. We process them at last part of calc_eclosure(), since we don't complete to calculate them here. */ - return dup_idx; + *new_idx = dup_idx; + return REG_NOERROR; } static void @@ -1193,6 +1220,7 @@ calc_eclosure (dfa) /* For each nodes, calculate epsilon closure. */ for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx) { + reg_errcode_t err; re_node_set eclosure_elem; if (node_idx == max) { @@ -1210,7 +1238,9 @@ calc_eclosure (dfa) if (dfa->eclosures[node_idx].nelem != 0) continue; /* Calculate epsilon closure of `node_idx'. */ - eclosure_elem = calc_eclosure_iter (dfa, node_idx, 1); + err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); + if (err != REG_NOERROR) + return err; if (dfa->eclosures[node_idx].nelem == 0) { @@ -1241,7 +1271,13 @@ calc_eclosure (dfa) { int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i]; if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type)) - dest_node_idx = duplicate_node (dfa, dest_node_idx, constraint); + { + reg_errcode_t err; + err = duplicate_node (&dest_node_idx, dfa, dest_node_idx, + constraint); + if (err != REG_NOERROR) + return err; + } re_node_set_insert (bkref_eclosure, dest_node_idx); } dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure; @@ -1252,15 +1288,19 @@ calc_eclosure (dfa) /* Calculate epsilon closure of NODE. */ -static re_node_set -calc_eclosure_iter (dfa, node, root) +static reg_errcode_t +calc_eclosure_iter (new_set, dfa, node, root) + re_node_set *new_set; re_dfa_t *dfa; int node, root; { + reg_errcode_t err; unsigned int constraint; int i, max, incomplete = 0; re_node_set eclosure; - re_node_set_alloc (&eclosure, 1); + err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); + if (err != REG_NOERROR) + return err; /* This indicates that we are calculating this node now. We reference this value to avoid infinite loop. */ @@ -1285,7 +1325,11 @@ calc_eclosure_iter (dfa, node, root) /* If we haven't calculated the epsilon closure of `edest' yet, calculate now. Otherwise use calculated epsilon closure. */ if (dfa->eclosures[edest].nelem == 0) - eclosure_elem = calc_eclosure_iter (dfa, edest, 0); + { + err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); + if (err != REG_NOERROR) + return err; + } else eclosure_elem = dfa->eclosures[edest]; /* Merge the epsilon closure of `edest'. */ @@ -1307,7 +1351,11 @@ calc_eclosure_iter (dfa, node, root) int dest = eclosure.elems[i]; if (!IS_EPSILON_NODE (dfa->nodes[dest].type)) { - int dup_dest = duplicate_node (dfa, dest, constraint); + int dup_dest; + reg_errcode_t err; + err = duplicate_node (&dup_dest, dfa, dest, constraint); + if (err != REG_NOERROR) + return err; if (dest != dup_dest) { re_node_set_remove_at (&eclosure, i--); @@ -1323,7 +1371,8 @@ calc_eclosure_iter (dfa, node, root) dfa->eclosures[node].nelem = 0; else dfa->eclosures[node] = eclosure; - return eclosure; + *new_set = eclosure; + return REG_NOERROR; } /* Functions for token which are used in the parser. */ @@ -1865,7 +1914,11 @@ parse_expression (regexp, preg, token, syntax, nest, err) break; case ANCHOR: if (dfa->word_char == NULL) - init_word_char (dfa); + { + *err = init_word_char (dfa); + if (*err != REG_NOERROR) + return NULL; + } if (token->opr.ctx_type == WORD_DELIM) { bin_tree_t *tree_first, *tree_last; @@ -2137,28 +2190,6 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err) I'm not sure, but maybe enough. */ #define BRACKET_NAME_BUF_SIZE 32 -static inline void * -extend_array_for_cset (array, num, alloc, type_size) - void *array; - int num, *alloc, type_size; -{ - void *new_array = array; - if (*alloc == num) - { - if (*alloc == 0) - { - new_array = malloc (type_size); - *alloc = 1; - } - else - { - new_array = realloc (array, type_size * num * 2); - *alloc = 2 * num; - } - } - return new_array; -} - /* This function parse bracket expression like "[abc]", "[a-c]", "[[.a-a.]]" etc. */ @@ -2299,22 +2330,15 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) uint32_t *new_array_end; int new_nranges; - /* XXX If mbcset->range_starts and mbcset->range_ends are NULL - if *range_alloc == 0 then we do not need the if. */ - if (*range_alloc == 0) - { - new_nranges = 1; - new_array_start = re_malloc (uint32_t, 1); - new_array_end = re_malloc (uint32_t, 1); - } - else - { - new_nranges = 2 * mbcset->nranges; - new_array_start = re_realloc (mbcset->range_starts, uint32_t, - new_nranges); - new_array_end = re_realloc (mbcset->range_ends, uint32_t, - new_nranges); - } + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + /* Use realloc since mbcset->range_starts and mbcset->range_ends + are NULL if *range_alloc == 0. */ + new_array_start = re_realloc (mbcset->range_starts, uint32_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, uint32_t, + new_nranges); + if (new_array_start == NULL || new_array_end == NULL) return REG_ESPACE; @@ -2394,13 +2418,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) /* Got valid collation sequence, add it as a new entry. */ /* Check the space of the arrays. */ - mbcset->coll_syms = extend_array_for_cset (mbcset->coll_syms, - mbcset->ncoll_syms, - coll_sym_alloc, - sizeof (int32_t)); - if (mbcset->coll_syms == NULL) - return REG_ESPACE; - + if (*coll_sym_alloc == mbcset->ncoll_syms) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->ncoll_syms is 0. */ + *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; + /* Use realloc since mbcset->coll_syms is NULL + if *alloc == 0. */ + mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t, + *coll_sym_alloc); + if (mbcset->coll_syms == NULL) + return REG_ESPACE; + } mbcset->coll_syms[mbcset->ncoll_syms++] = idx; return REG_NOERROR; } @@ -2557,12 +2586,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) bitset_set (sbcset, start_elem.opr.ch); break; case MB_CHAR: - mbcset->mbchars = extend_array_for_cset (mbcset->mbchars, - mbcset->nmbchars, - &mbchar_alloc, - sizeof (wchar_t)); - if (mbcset->mbchars == NULL) - goto parse_bracket_exp_espace; + /* Check whether the array has enough space. */ + if (mbchar_alloc == mbcset->nmbchars) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nmbchars is 0. */ + mbchar_alloc = 2 * mbcset->nmbchars + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t, + mbchar_alloc); + if (mbcset->mbchars == NULL) + goto parse_bracket_exp_espace; + } mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; break; case EQUIV_CLASS: @@ -2779,14 +2814,18 @@ build_equiv_class (mbcset, sbcset, equiv_class_alloc, name) bitset_set (sbcset, ch); } } - /* Check the space of the arrays, and extend if we need. */ - mbcset->equiv_classes = extend_array_for_cset (mbcset->equiv_classes, - mbcset->nequiv_classes, - equiv_class_alloc, - sizeof (int32_t)); - if (mbcset->equiv_classes == NULL) - return REG_ESPACE; - + /* Check whether the array has enough space. */ + if (*equiv_class_alloc == mbcset->nequiv_classes) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nequiv_classes is 0. */ + *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; + /* Use realloc since the array is NULL if *alloc == 0. */ + mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t, + *equiv_class_alloc); + if (mbcset->equiv_classes == NULL) + return REG_ESPACE; + } mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; } else @@ -2815,12 +2854,17 @@ build_charclass (mbcset, sbcset, char_class_alloc, name) int i; /* Check the space of the arrays. */ - mbcset->char_classes = extend_array_for_cset (mbcset->char_classes, - mbcset->nchar_classes, - char_class_alloc, - sizeof (wctype_t)); - if (mbcset->char_classes == NULL) - return REG_ESPACE; + if (*char_class_alloc == mbcset->nchar_classes) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nchar_classes is 0. */ + *char_class_alloc = 2 * mbcset->nchar_classes + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t, + *char_class_alloc); + if (mbcset->char_classes == NULL) + return REG_ESPACE; + } mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); |