aboutsummaryrefslogtreecommitdiff
path: root/posix
diff options
context:
space:
mode:
Diffstat (limited to 'posix')
-rw-r--r--posix/regcomp.c240
-rw-r--r--posix/regex_internal.c139
-rw-r--r--posix/regex_internal.h5
-rw-r--r--posix/regexec.c468
4 files changed, 569 insertions, 283 deletions
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 12da043062..0b85f7db4b 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -63,7 +63,7 @@ static void re_compile_fastmap_iter (regex_t *bufp,
const re_dfastate_t *init_state,
char *fastmap);
static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
-static void init_word_char (re_dfa_t *dfa);
+static reg_errcode_t init_word_char (re_dfa_t *dfa);
static void free_charset (re_charset_t *cset);
static void free_workarea_compile (regex_t *preg);
static reg_errcode_t create_initial_state (re_dfa_t *dfa);
@@ -72,10 +72,11 @@ static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
-static int duplicate_node (re_dfa_t *dfa, int org_idx,
- unsigned int constraint);
+static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
+ unsigned int constraint);
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
-static re_node_set calc_eclosure_iter (re_dfa_t *dfa, int node, int root);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+ int node, int root);
static void calc_inveclosure (re_dfa_t *dfa);
static int fetch_number (re_string_t *input, re_token_t *token,
reg_syntax_t syntax);
@@ -446,8 +447,8 @@ regcomp (preg, pattern, cflags)
if (ret == REG_ERPAREN)
ret = REG_EPAREN;
- /* XXX Why the test for preg->fastmap != NULL? */
- if (ret == REG_NOERROR && preg->fastmap != NULL)
+ /* We have already checked preg->fastmap != NULL. */
+ if (ret == REG_NOERROR)
{
/* Compute the fastmap now, since regexec cannot modify the pattern
buffer. */
@@ -772,16 +773,19 @@ init_dfa (dfa, pat_len)
"word". In this case "word" means that it is the word construction
character used by some operators like "\<", "\>", etc. */
-static void
+static reg_errcode_t
init_word_char (dfa)
re_dfa_t *dfa;
{
int i, j, ch;
dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
+ if (dfa->word_char == NULL)
+ return REG_ESPACE;
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
for (j = 0; j < UINT_BITS; ++j, ++ch)
if (isalnum (ch) || ch == '_')
dfa->word_char[i] |= 1 << j;
+ return REG_NOERROR;
}
/* Free the work area which are only used while compiling. */
@@ -844,24 +848,28 @@ create_initial_state (dfa)
}
/* It must be the first time to invoke acquire_state. */
- dfa->init_state = re_acquire_state_context (dfa, &init_nodes, 0);
+ dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+ /* We don't check ERR here, since the initial state must not be NULL. */
+ if (dfa->init_state == NULL)
+ return err;
if (dfa->init_state->has_constraint)
{
- dfa->init_state_word = re_acquire_state_context (dfa, &init_nodes,
+ dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
CONTEXT_WORD);
- dfa->init_state_nl = re_acquire_state_context (dfa, &init_nodes,
+ dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
CONTEXT_NEWLINE);
- dfa->init_state_begbuf = re_acquire_state_context (dfa, &init_nodes,
+ dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+ &init_nodes,
CONTEXT_NEWLINE
| CONTEXT_BEGBUF);
+ if (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL)
+ return err;
}
else
dfa->init_state_word = dfa->init_state_nl
= dfa->init_state_begbuf = dfa->init_state;
- if (dfa->init_state == NULL || dfa->init_state_word == NULL
- || dfa->init_state_nl == NULL || dfa->init_state_begbuf == NULL )
- return REG_ESPACE;
re_node_set_free (&init_nodes);
return REG_NOERROR;
}
@@ -1114,20 +1122,30 @@ calc_epsdest (dfa, node)
}
}
-static int
-duplicate_node (dfa, org_idx, constraint)
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+ The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
+ otherwise return the error code. */
+
+static reg_errcode_t
+duplicate_node (new_idx, dfa, org_idx, constraint)
re_dfa_t *dfa;
- int org_idx;
+ int *new_idx, org_idx;
unsigned int constraint;
{
re_token_t dup;
int dup_idx;
+ reg_errcode_t err;
dup.type = OP_CONTEXT_NODE;
if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE)
{
+ /* If the node whose index is ORG_IDX is the same as the intended
+ node, use it. */
if (dfa->nodes[org_idx].constraint == constraint)
- return org_idx;
+ {
+ *new_idx = org_idx;
+ return REG_NOERROR;
+ }
dup.constraint = constraint |
dfa->nodes[org_idx].constraint;
}
@@ -1137,23 +1155,32 @@ duplicate_node (dfa, org_idx, constraint)
/* In case that `entity' points OP_CONTEXT_NODE,
we correct `entity' to real entity in calc_inveclosures(). */
dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info));
+ dup_idx = re_dfa_add_node (dfa, dup, 1);
+ if (dup.opr.ctx_info == NULL || dup_idx == -1)
+ return REG_ESPACE;
dup.opr.ctx_info->entity = org_idx;
dup.opr.ctx_info->bkref_eclosure = NULL;
- dup_idx = re_dfa_add_node (dfa, dup, 1);
- dfa->nodes[dup_idx].duplicated = 1;
+ dfa->nodes[dup_idx].duplicated = 1;
dfa->firsts[dup_idx] = dfa->firsts[org_idx];
dfa->nexts[dup_idx] = dfa->nexts[org_idx];
- re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
+ err = re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
+ if (err != REG_NOERROR)
+ return err;
/* Since we don't duplicate epsilon nodes, epsilon closure have
only itself. */
- re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
- re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
+ err = re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
+ if (err != REG_NOERROR)
+ return err;
+ err = re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
+ if (err != REG_NOERROR)
+ return err;
/* Then we must update inveclosure for this node.
We process them at last part of calc_eclosure(),
since we don't complete to calculate them here. */
- return dup_idx;
+ *new_idx = dup_idx;
+ return REG_NOERROR;
}
static void
@@ -1193,6 +1220,7 @@ calc_eclosure (dfa)
/* For each nodes, calculate epsilon closure. */
for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx)
{
+ reg_errcode_t err;
re_node_set eclosure_elem;
if (node_idx == max)
{
@@ -1210,7 +1238,9 @@ calc_eclosure (dfa)
if (dfa->eclosures[node_idx].nelem != 0)
continue;
/* Calculate epsilon closure of `node_idx'. */
- eclosure_elem = calc_eclosure_iter (dfa, node_idx, 1);
+ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
+ if (err != REG_NOERROR)
+ return err;
if (dfa->eclosures[node_idx].nelem == 0)
{
@@ -1241,7 +1271,13 @@ calc_eclosure (dfa)
{
int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i];
if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type))
- dest_node_idx = duplicate_node (dfa, dest_node_idx, constraint);
+ {
+ reg_errcode_t err;
+ err = duplicate_node (&dest_node_idx, dfa, dest_node_idx,
+ constraint);
+ if (err != REG_NOERROR)
+ return err;
+ }
re_node_set_insert (bkref_eclosure, dest_node_idx);
}
dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure;
@@ -1252,15 +1288,19 @@ calc_eclosure (dfa)
/* Calculate epsilon closure of NODE. */
-static re_node_set
-calc_eclosure_iter (dfa, node, root)
+static reg_errcode_t
+calc_eclosure_iter (new_set, dfa, node, root)
+ re_node_set *new_set;
re_dfa_t *dfa;
int node, root;
{
+ reg_errcode_t err;
unsigned int constraint;
int i, max, incomplete = 0;
re_node_set eclosure;
- re_node_set_alloc (&eclosure, 1);
+ err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+ if (err != REG_NOERROR)
+ return err;
/* This indicates that we are calculating this node now.
We reference this value to avoid infinite loop. */
@@ -1285,7 +1325,11 @@ calc_eclosure_iter (dfa, node, root)
/* If we haven't calculated the epsilon closure of `edest' yet,
calculate now. Otherwise use calculated epsilon closure. */
if (dfa->eclosures[edest].nelem == 0)
- eclosure_elem = calc_eclosure_iter (dfa, edest, 0);
+ {
+ err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
+ if (err != REG_NOERROR)
+ return err;
+ }
else
eclosure_elem = dfa->eclosures[edest];
/* Merge the epsilon closure of `edest'. */
@@ -1307,7 +1351,11 @@ calc_eclosure_iter (dfa, node, root)
int dest = eclosure.elems[i];
if (!IS_EPSILON_NODE (dfa->nodes[dest].type))
{
- int dup_dest = duplicate_node (dfa, dest, constraint);
+ int dup_dest;
+ reg_errcode_t err;
+ err = duplicate_node (&dup_dest, dfa, dest, constraint);
+ if (err != REG_NOERROR)
+ return err;
if (dest != dup_dest)
{
re_node_set_remove_at (&eclosure, i--);
@@ -1323,7 +1371,8 @@ calc_eclosure_iter (dfa, node, root)
dfa->eclosures[node].nelem = 0;
else
dfa->eclosures[node] = eclosure;
- return eclosure;
+ *new_set = eclosure;
+ return REG_NOERROR;
}
/* Functions for token which are used in the parser. */
@@ -1865,7 +1914,11 @@ parse_expression (regexp, preg, token, syntax, nest, err)
break;
case ANCHOR:
if (dfa->word_char == NULL)
- init_word_char (dfa);
+ {
+ *err = init_word_char (dfa);
+ if (*err != REG_NOERROR)
+ return NULL;
+ }
if (token->opr.ctx_type == WORD_DELIM)
{
bin_tree_t *tree_first, *tree_last;
@@ -2137,28 +2190,6 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
I'm not sure, but maybe enough. */
#define BRACKET_NAME_BUF_SIZE 32
-static inline void *
-extend_array_for_cset (array, num, alloc, type_size)
- void *array;
- int num, *alloc, type_size;
-{
- void *new_array = array;
- if (*alloc == num)
- {
- if (*alloc == 0)
- {
- new_array = malloc (type_size);
- *alloc = 1;
- }
- else
- {
- new_array = realloc (array, type_size * num * 2);
- *alloc = 2 * num;
- }
- }
- return new_array;
-}
-
/* This function parse bracket expression like "[abc]", "[a-c]",
"[[.a-a.]]" etc. */
@@ -2299,22 +2330,15 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
uint32_t *new_array_end;
int new_nranges;
- /* XXX If mbcset->range_starts and mbcset->range_ends are NULL
- if *range_alloc == 0 then we do not need the if. */
- if (*range_alloc == 0)
- {
- new_nranges = 1;
- new_array_start = re_malloc (uint32_t, 1);
- new_array_end = re_malloc (uint32_t, 1);
- }
- else
- {
- new_nranges = 2 * mbcset->nranges;
- new_array_start = re_realloc (mbcset->range_starts, uint32_t,
- new_nranges);
- new_array_end = re_realloc (mbcset->range_ends, uint32_t,
- new_nranges);
- }
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ /* Use realloc since mbcset->range_starts and mbcset->range_ends
+ are NULL if *range_alloc == 0. */
+ new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+ new_nranges);
+
if (new_array_start == NULL || new_array_end == NULL)
return REG_ESPACE;
@@ -2394,13 +2418,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
/* Got valid collation sequence, add it as a new entry. */
/* Check the space of the arrays. */
- mbcset->coll_syms = extend_array_for_cset (mbcset->coll_syms,
- mbcset->ncoll_syms,
- coll_sym_alloc,
- sizeof (int32_t));
- if (mbcset->coll_syms == NULL)
- return REG_ESPACE;
-
+ if (*coll_sym_alloc == mbcset->ncoll_syms)
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->ncoll_syms is 0. */
+ *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+ /* Use realloc since mbcset->coll_syms is NULL
+ if *alloc == 0. */
+ mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+ *coll_sym_alloc);
+ if (mbcset->coll_syms == NULL)
+ return REG_ESPACE;
+ }
mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
return REG_NOERROR;
}
@@ -2557,12 +2586,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
bitset_set (sbcset, start_elem.opr.ch);
break;
case MB_CHAR:
- mbcset->mbchars = extend_array_for_cset (mbcset->mbchars,
- mbcset->nmbchars,
- &mbchar_alloc,
- sizeof (wchar_t));
- if (mbcset->mbchars == NULL)
- goto parse_bracket_exp_espace;
+ /* Check whether the array has enough space. */
+ if (mbchar_alloc == mbcset->nmbchars)
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nmbchars is 0. */
+ mbchar_alloc = 2 * mbcset->nmbchars + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t,
+ mbchar_alloc);
+ if (mbcset->mbchars == NULL)
+ goto parse_bracket_exp_espace;
+ }
mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
break;
case EQUIV_CLASS:
@@ -2779,14 +2814,18 @@ build_equiv_class (mbcset, sbcset, equiv_class_alloc, name)
bitset_set (sbcset, ch);
}
}
- /* Check the space of the arrays, and extend if we need. */
- mbcset->equiv_classes = extend_array_for_cset (mbcset->equiv_classes,
- mbcset->nequiv_classes,
- equiv_class_alloc,
- sizeof (int32_t));
- if (mbcset->equiv_classes == NULL)
- return REG_ESPACE;
-
+ /* Check whether the array has enough space. */
+ if (*equiv_class_alloc == mbcset->nequiv_classes)
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nequiv_classes is 0. */
+ *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+ /* Use realloc since the array is NULL if *alloc == 0. */
+ mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t,
+ *equiv_class_alloc);
+ if (mbcset->equiv_classes == NULL)
+ return REG_ESPACE;
+ }
mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
}
else
@@ -2815,12 +2854,17 @@ build_charclass (mbcset, sbcset, char_class_alloc, name)
int i;
/* Check the space of the arrays. */
- mbcset->char_classes = extend_array_for_cset (mbcset->char_classes,
- mbcset->nchar_classes,
- char_class_alloc,
- sizeof (wctype_t));
- if (mbcset->char_classes == NULL)
- return REG_ESPACE;
+ if (*char_class_alloc == mbcset->nchar_classes)
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nchar_classes is 0. */
+ *char_class_alloc = 2 * mbcset->nchar_classes + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t,
+ *char_class_alloc);
+ if (mbcset->char_classes == NULL)
+ return REG_ESPACE;
+ }
mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index 63bed420cd..11726b340d 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -68,6 +68,8 @@ static reg_errcode_t re_string_translate_buffer (re_string_t *pstr,
static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int hash);
+static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
+ unsigned int hash);
static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int hash);
@@ -473,6 +475,10 @@ re_node_set_init_copy (dest, src)
return REG_NOERROR;
}
+/* Calculate the intersection of the sets SRC1 and SRC2. And store it in
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded.
+ Note: We assume dest->elems is NULL, when dest->alloc is 0. */
+
static reg_errcode_t
re_node_set_intersect (dest, src1, src2)
re_node_set *dest;
@@ -483,31 +489,28 @@ re_node_set_intersect (dest, src1, src2)
{
if (src1->nelem + src2->nelem > dest->alloc)
{
- int *new_array;
- if (dest->alloc == 0)
- new_array = re_malloc (int, src1->nelem + src2->nelem);
- else
- new_array = re_realloc (dest->elems, int,
- src1->nelem + src2->nelem);
dest->alloc = src1->nelem + src2->nelem;
- if (new_array == NULL)
+ dest->elems = re_realloc (dest->elems, int, dest->alloc);
+ if (dest->elems == NULL)
return REG_ESPACE;
- dest->elems = new_array;
}
}
else
{
+ /* The intersection of empty sets is also empty set. */
dest->nelem = 0;
return REG_NOERROR;
}
- for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+ for (i1 = i2 = id = 0; i1 < src1->nelem && i2 < src2->nelem; )
{
if (src1->elems[i1] > src2->elems[i2])
{
++i2;
continue;
}
+ /* The intersection must have the elements which are in both of
+ SRC1 and SRC2. */
if (src1->elems[i1] == src2->elems[i2])
dest->elems[id++] = src2->elems[i2++];
++i1;
@@ -516,6 +519,10 @@ re_node_set_intersect (dest, src1, src2)
return REG_NOERROR;
}
+/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded.
+ Note: We assume dest->elems is NULL, when dest->alloc is 0. */
+
static reg_errcode_t
re_node_set_add_intersect (dest, src1, src2)
re_node_set *dest;
@@ -526,16 +533,10 @@ re_node_set_add_intersect (dest, src1, src2)
{
if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
{
- int *new_array;
- if (dest->alloc == 0)
- new_array = re_malloc (int, src1->nelem + src2->nelem);
- else
- new_array = re_realloc (dest->elems, int,
- src1->nelem + src2->nelem + dest->nelem);
dest->alloc = src1->nelem + src2->nelem + dest->nelem;
- if (new_array == NULL)
+ dest->elems = re_realloc (dest->elems, int, dest->alloc);
+ if (dest->elems == NULL)
return REG_ESPACE;
- dest->elems = new_array;
}
}
else
@@ -567,6 +568,9 @@ re_node_set_add_intersect (dest, src1, src2)
return REG_NOERROR;
}
+/* Calculate the union set of the sets SRC1 and SRC2. And store it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
+
static reg_errcode_t
re_node_set_init_union (dest, src1, src2)
re_node_set *dest;
@@ -617,6 +621,9 @@ re_node_set_init_union (dest, src1, src2)
return REG_NOERROR;
}
+/* Calculate the union set of the sets DEST and SRC. And store it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
+
static reg_errcode_t
re_node_set_merge (dest, src)
re_node_set *dest;
@@ -628,12 +635,16 @@ re_node_set_merge (dest, src)
else if (dest == NULL)
{
dest = re_malloc (re_node_set, 1);
+ if (dest == NULL)
+ return REG_ESPACE;
return re_node_set_init_copy (dest, src);
}
if (dest->alloc < src->nelem + dest->nelem)
{
dest->alloc = 2 * (src->nelem + dest->alloc);
dest->elems = re_realloc (dest->elems, int, dest->alloc);
+ if (dest->elems == NULL)
+ return REG_ESPACE;
}
for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
@@ -860,18 +871,28 @@ calc_state_hash (nodes, context)
/* Search for the state whose node_set is equivalent to NODES.
Return the pointer to the state, if we found it in the DFA.
- Otherwise create the new one and return it. */
-
-static re_dfastate_t *
-re_acquire_state (dfa, nodes)
+ Otherwise create the new one and return it. In case of an error
+ return NULL and set the error code in ERR.
+ Note: - We assume NULL as the invalid state, then it is possible that
+ return value is NULL and ERR is REG_NOERROR.
+ - We never return non-NULL value in case of any errors, it is for
+ optimization. */
+
+static re_dfastate_t*
+re_acquire_state (err, dfa, nodes)
+ reg_errcode_t *err;
re_dfa_t *dfa;
const re_node_set *nodes;
{
unsigned int hash;
+ re_dfastate_t *new_state;
struct re_state_table_entry *spot;
int i;
if (nodes->nelem == 0)
- return NULL;
+ {
+ *err = REG_NOERROR;
+ return NULL;
+ }
hash = calc_state_hash (nodes, 0);
spot = dfa->state_table + (hash & dfa->state_hash_mask);
@@ -893,25 +914,42 @@ re_acquire_state (dfa, nodes)
}
/* There are no appropriate state in the dfa, create the new one. */
- return create_ci_newstate (dfa, nodes, hash);
+ new_state = create_ci_newstate (dfa, nodes, hash);
+ if (new_state != NULL)
+ return new_state;
+ else
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
}
/* Search for the state whose node_set is equivalent to NODES and
whose context is equivalent to CONTEXT.
Return the pointer to the state, if we found it in the DFA.
- Otherwise create the new one and return it. */
-
-static re_dfastate_t *
-re_acquire_state_context (dfa, nodes, context)
+ Otherwise create the new one and return it. In case of an error
+ return NULL and set the error code in ERR.
+ Note: - We assume NULL as the invalid state, then it is possible that
+ return value is NULL and ERR is REG_NOERROR.
+ - We never return non-NULL value in case of any errors, it is for
+ optimization. */
+
+static re_dfastate_t*
+re_acquire_state_context (err, dfa, nodes, context)
+ reg_errcode_t *err;
re_dfa_t *dfa;
const re_node_set *nodes;
unsigned int context;
{
unsigned int hash;
+ re_dfastate_t *new_state;
struct re_state_table_entry *spot;
int i;
if (nodes->nelem == 0)
- return NULL;
+ {
+ *err = REG_NOERROR;
+ return NULL;
+ }
hash = calc_state_hash (nodes, context);
spot = dfa->state_table + (hash & dfa->state_hash_mask);
@@ -934,9 +972,19 @@ re_acquire_state_context (dfa, nodes, context)
return state;
}
/* There are no appropriate state in `dfa', create the new one. */
- return create_cd_newstate (dfa, nodes, context, hash);
+ new_state = create_cd_newstate (dfa, nodes, context, hash);
+ if (new_state != NULL)
+ return new_state;
+ else
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
}
+/* Allocate memory for DFA state and initialize common properties.
+ Return the new state if succeeded, otherwise return NULL. */
+
static re_dfastate_t *
create_newstate_common (dfa, nodes, hash)
re_dfa_t *dfa;
@@ -945,6 +993,8 @@ create_newstate_common (dfa, nodes, hash)
{
re_dfastate_t *newstate;
newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+ if (newstate == NULL)
+ return NULL;
re_node_set_init_copy (&newstate->nodes, nodes);
newstate->trtable = NULL;
newstate->trtable_search = NULL;
@@ -952,7 +1002,10 @@ create_newstate_common (dfa, nodes, hash)
return newstate;
}
-static void
+/* Store the new state NEWSTATE whose hash value is HASH in appropriate
+ position. Return value indicate the error code if failed. */
+
+static reg_errcode_t
register_state (dfa, newstate, hash)
re_dfa_t *dfa;
re_dfastate_t *newstate;
@@ -972,8 +1025,7 @@ register_state (dfa, newstate, hash)
spot->alloc = 4;
new_array = re_malloc (re_dfastate_t *, spot->alloc);
if (new_array == NULL)
- /* XXX return value */
- return;
+ return REG_ESPACE;
new_array[0] = spot->entry.state;
}
else
@@ -985,8 +1037,12 @@ register_state (dfa, newstate, hash)
spot->entry.array = new_array;
}
spot->entry.array[spot->num++] = newstate;
+ return REG_NOERROR;
}
+/* Create the new state which is independ of contexts.
+ Return the new state if succeeded, otherwise return NULL. */
+
static re_dfastate_t *
create_ci_newstate (dfa, nodes, hash)
re_dfa_t *dfa;
@@ -994,8 +1050,11 @@ create_ci_newstate (dfa, nodes, hash)
unsigned int hash;
{
int i;
+ reg_errcode_t err;
re_dfastate_t *newstate;
newstate = create_newstate_common (dfa, nodes, hash);
+ if (newstate == NULL)
+ return NULL;
newstate->entrance_nodes = &newstate->nodes;
for (i = 0 ; i < nodes->nelem ; i++)
@@ -1021,11 +1080,13 @@ create_ci_newstate (dfa, nodes, hash)
newstate->halt = 1;
}
}
-
- register_state (dfa, newstate, hash);
- return newstate;
+ err = register_state (dfa, newstate, hash);
+ return (err != REG_NOERROR) ? NULL : newstate;
}
+/* Create the new state which is depend on the context CONTEXT.
+ Return the new state if succeeded, otherwise return NULL. */
+
static re_dfastate_t *
create_cd_newstate (dfa, nodes, context, hash)
re_dfa_t *dfa;
@@ -1033,9 +1094,12 @@ create_cd_newstate (dfa, nodes, context, hash)
unsigned int context, hash;
{
int i, nctx_nodes = 0;
+ reg_errcode_t err;
re_dfastate_t *newstate;
newstate = create_newstate_common (dfa, nodes, hash);
+ if (newstate == NULL)
+ return NULL;
newstate->context = context;
newstate->entrance_nodes = &newstate->nodes;
@@ -1076,7 +1140,6 @@ create_cd_newstate (dfa, nodes, context, hash)
{
newstate->entrance_nodes = re_malloc (re_node_set, 1);
if (newstate->entrance_nodes == NULL)
- /* XXX Return which value? */
return NULL;
re_node_set_init_copy (newstate->entrance_nodes, nodes);
nctx_nodes = 0;
@@ -1090,6 +1153,6 @@ create_cd_newstate (dfa, nodes, context, hash)
}
}
}
- register_state (dfa, newstate, hash);
- return newstate;
+ err = register_state (dfa, newstate, hash);
+ return (err != REG_NOERROR) ? NULL : newstate;
}
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 35f9f4a868..38d68d47c0 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -426,9 +426,10 @@ static void re_node_set_remove_at (re_node_set *set, int idx);
#define re_node_set_empty(p) ((p)->nelem = 0)
#define re_node_set_free(set) re_free ((set)->elems)
static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode);
-static re_dfastate_t *re_acquire_state (re_dfa_t *dfa,
+static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
const re_node_set *nodes);
-static re_dfastate_t *re_acquire_state_context (re_dfa_t *dfa,
+static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
+ re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int context);
diff --git a/posix/regexec.c b/posix/regexec.c
index cf8f304b48..dc60e50e70 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -38,15 +38,19 @@
#include "regex.h"
#include "regex_internal.h"
-static void match_ctx_init (re_match_context_t *cache, int eflags, int n);
+static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
+ int n);
static void match_ctx_free (re_match_context_t *cache);
-static void match_ctx_add_entry (re_match_context_t *cache, int node, int from,
- int to);
-static int re_search_internal (const regex_t *preg, const char *string,
- int length, int start, int range, size_t nmatch,
- regmatch_t pmatch[], int eflags);
-static inline re_dfastate_t *acquire_init_state_context (const regex_t *preg,
- const re_string_t *input, int idx, int eflags);
+static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
+ int from, int to);
+static reg_errcode_t re_search_internal (const regex_t *preg,
+ const char *string, int length,
+ int start, int range, size_t nmatch,
+ regmatch_t pmatch[], int eflags);
+static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
+ const regex_t *preg,
+ const re_string_t *input,
+ int idx, int eflags);
static int check_matching (const regex_t *preg, re_string_t *input,
re_match_context_t *mctx, re_dfastate_t **state_log,
int start_idx, int fl_search, int fl_longest_match);
@@ -61,9 +65,10 @@ static int proceed_next_node (const regex_t *preg,
const re_match_context_t *mctx,
const re_string_t *input,
int *pidx, int node, re_node_set *eps_via_nodes);
-static void set_regs (const regex_t *preg, re_dfastate_t **state_log,
- const re_match_context_t *mctx, const re_string_t *input,
- size_t nmatch, regmatch_t *pmatch, int last);
+static reg_errcode_t set_regs (const regex_t *preg, re_dfastate_t **state_log,
+ const re_match_context_t *mctx,
+ const re_string_t *input, size_t nmatch,
+ regmatch_t *pmatch, int last);
static int sift_states_iter_mb (const regex_t *preg, re_dfastate_t **state_log,
const re_match_context_t *mctx,
const re_string_t *input, int node_idx,
@@ -73,36 +78,40 @@ static int sift_states_iter_bkref (const re_dfa_t *dfa,
struct re_backref_cache_entry *mctx_entry,
int node_idx, int idx, int match_first,
int match_last);
-static void sift_states_backward (const regex_t *preg,
- re_dfastate_t **state_log,
- const re_match_context_t *mctx,
- const re_string_t *input, int last_node);
-static void add_epsilon_backreference (const re_dfa_t *dfa,
- const re_match_context_t *mctx,
- const re_node_set *plog, int idx,
- re_node_set *state_buf);
-static re_dfastate_t *transit_state (const regex_t *preg, re_dfastate_t *state,
- re_string_t *input, int fl_search,
- re_dfastate_t **state_log,
+static reg_errcode_t sift_states_backward (const regex_t *preg,
+ re_dfastate_t **state_log,
+ const re_match_context_t *mctx,
+ const re_string_t *input,
+ int last_node);
+static reg_errcode_t add_epsilon_backreference (const re_dfa_t *dfa,
+ const re_match_context_t *mctx,
+ const re_node_set *plog,
+ int idx,
+ re_node_set *state_buf);
+static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
+ re_dfastate_t *state, re_string_t *input,
+ int fl_search, re_dfastate_t **state_log,
re_match_context_t *mctx);
-static re_dfastate_t *transit_state_sb (const regex_t *preg,
+static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
re_dfastate_t *pstate,
re_string_t *input, int fl_search,
re_match_context_t *mctx);
-static void transit_state_mb (const regex_t *preg, re_dfastate_t *pstate,
- const re_string_t *input,
- re_dfastate_t **state_log,
- re_match_context_t *mctx);
-static void transit_state_bkref (const regex_t *preg, re_dfastate_t *pstate,
- const re_string_t *input,
- re_dfastate_t **state_log,
- re_match_context_t *mctx);
-static void transit_state_bkref_loop (const regex_t *preg,
- const re_string_t *input,
- re_node_set *nodes,
- re_dfastate_t **work_state_log,
- re_dfastate_t **state_log,
- re_match_context_t *mctx);
+static reg_errcode_t transit_state_mb (const regex_t *preg,
+ re_dfastate_t *pstate,
+ const re_string_t *input,
+ re_dfastate_t **state_log,
+ re_match_context_t *mctx);
+static reg_errcode_t transit_state_bkref (const regex_t *preg,
+ re_dfastate_t *pstate,
+ const re_string_t *input,
+ re_dfastate_t **state_log,
+ re_match_context_t *mctx);
+static reg_errcode_t transit_state_bkref_loop (const regex_t *preg,
+ const re_string_t *input,
+ re_node_set *nodes,
+ re_dfastate_t **work_state_log,
+ re_dfastate_t **state_log,
+ re_match_context_t *mctx);
static re_dfastate_t **build_trtable (const regex_t *dfa,
const re_dfastate_t *state,
int fl_search);
@@ -141,13 +150,15 @@ regexec (preg, string, nmatch, pmatch, eflags)
regmatch_t pmatch[];
int eflags;
{
+ reg_errcode_t err;
int length = strlen (string);
if (preg->no_sub)
- return re_search_internal (preg, string, length, 0, length, 0,
- NULL, eflags);
+ err = re_search_internal (preg, string, length, 0, length, 0,
+ NULL, eflags);
else
- return re_search_internal (preg, string, length, 0, length, nmatch,
- pmatch, eflags);
+ err = re_search_internal (preg, string, length, 0, length, nmatch,
+ pmatch, eflags);
+ return err != REG_NOERROR;
}
#ifdef _LIBC
weak_alias (__regexec, regexec)
@@ -164,7 +175,8 @@ re_match (buffer, string, length, start, regs)
int length, start;
struct re_registers *regs;
{
- int i, nregs, result, rval, eflags = 0;
+ reg_errcode_t result;
+ int i, nregs, rval, eflags = 0;
regmatch_t *pmatch;
eflags |= (buffer->not_bol) ? REG_NOTBOL : 0;
@@ -238,7 +250,7 @@ re_match (buffer, string, length, start, regs)
}
}
/* Return value is -1 if not match, the length of mathing otherwise. */
- rval = (result) ? -1 : pmatch[0].rm_eo - pmatch[0].rm_so;
+ rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_eo - pmatch[0].rm_so;
re_free (pmatch);
return rval;
}
@@ -290,7 +302,8 @@ re_search (bufp, string, size, startpos, range, regs)
int size, startpos, range;
struct re_registers *regs;
{
- int i, nregs, result, real_range, rval, eflags = 0;
+ reg_errcode_t result;
+ int i, nregs, real_range, rval, eflags = 0;
regmatch_t *pmatch;
eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
@@ -376,7 +389,7 @@ re_search (bufp, string, size, startpos, range, regs)
}
/* Return value is -1 if not match, the position where the mathing starts
otherwise. */
- rval = (result) ? -1 : pmatch[0].rm_so;
+ rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_so;
re_free (pmatch);
return rval;
}
@@ -486,11 +499,12 @@ static re_node_set empty_set;
length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
mingings with regexec. START, and RANGE have the same meanings
with re_search.
- Return 0 if we find a match and REG_NOMATCH if not.
+ Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
+ otherwise return the error code.
Note: We assume front end functions already check ranges.
(START + RANGE >= 0 && START + RANGE <= LENGTH) */
-static int
+static reg_errcode_t
re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
const regex_t *preg;
const char *string;
@@ -498,6 +512,7 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
size_t nmatch;
regmatch_t pmatch[];
{
+ reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
re_string_t input;
re_dfastate_t **state_log;
@@ -510,7 +525,7 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
if (preg->used == 0 || dfa->init_state == NULL
|| dfa->init_state_word == NULL || dfa->init_state_nl == NULL
|| dfa->init_state_begbuf == NULL)
- return 1;
+ return REG_NOMATCH;
re_node_set_init_empty (&empty_set);
@@ -522,16 +537,24 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
back-reference or a node which can accept multibyte character or
multi character collating element. */
if (nmatch > 1 || dfa->has_mb_node)
- state_log = re_malloc (re_dfastate_t *, length + 1);
+ {
+ state_log = re_malloc (re_dfastate_t *, length + 1);
+ if (state_log == NULL)
+ return REG_ESPACE;
+ }
else
state_log = NULL;
if (preg->syntax & RE_ICASE)
- re_string_construct_toupper (&input, string, length, preg->translate);
+ err = re_string_construct_toupper (&input, string, length, preg->translate);
else
- re_string_construct (&input, string, length, preg->translate);
+ err = re_string_construct (&input, string, length, preg->translate);
+ if (err != REG_NOERROR)
+ return err;
- match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
+ err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
+ if (err != REG_NOERROR)
+ return err;
#ifdef DEBUG
/* We assume front-end functions already check them. */
@@ -557,7 +580,12 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
match_last = check_matching (preg, &input, &mctx, state_log,
match_first, 0, fl_longest_match);
if (match_last != -1)
- break;
+ {
+ if (match_last == -2)
+ return REG_ESPACE;
+ else
+ break; /* We found a matching. */
+ }
}
}
/* Update counter. */
@@ -598,8 +626,13 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
#endif
halt_node = check_halt_state_context (preg, pstate, &input,
match_last, eflags);
- sift_states_backward (preg, state_log, &mctx, &input, halt_node);
- set_regs (preg, state_log, &mctx, &input, nmatch, pmatch, halt_node);
+ err = sift_states_backward (preg, state_log, &mctx, &input, halt_node);
+ if (err != REG_NOERROR)
+ return err;
+ err = set_regs (preg, state_log, &mctx, &input, nmatch, pmatch,
+ halt_node);
+ if (err != REG_NOERROR)
+ return err;
}
}
@@ -607,21 +640,23 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
if (dfa->nbackref)
match_ctx_free (&mctx);
re_string_destruct (&input);
- return match_last == -1;
+ return (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
}
-/* Acquire an initial state.
+/* Acquire an initial state and return it.
We must select appropriate initial state depending on the context,
since initial states may have constraints like "\<", "^", etc.. */
static inline re_dfastate_t *
-acquire_init_state_context (preg, input, idx, eflags)
- const regex_t *preg;
- const re_string_t *input;
- int idx, eflags;
+acquire_init_state_context (err, preg, input, idx, eflags)
+ reg_errcode_t *err;
+ const regex_t *preg;
+ const re_string_t *input;
+ int idx, eflags;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ *err = REG_NOERROR;
if (dfa->init_state->has_constraint)
{
unsigned int context;
@@ -636,9 +671,12 @@ acquire_init_state_context (preg, input, idx, eflags)
else if (IS_NEWLINE_CONTEXT (context))
return dfa->init_state_nl;
else if (IS_BEGBUF_CONTEXT (context))
- /* It is relatively rare case, then calculate on demand. */
- return re_acquire_state_context (dfa, dfa->init_state->entrance_nodes,
- context);
+ {
+ /* It is relatively rare case, then calculate on demand. */
+ return re_acquire_state_context (err, dfa,
+ dfa->init_state->entrance_nodes,
+ context);
+ }
else
/* Must not happen? */
return dfa->init_state;
@@ -648,7 +686,8 @@ acquire_init_state_context (preg, input, idx, eflags)
}
/* Check whether the regular expression match input string INPUT or not,
- and return the index where the matching end, or return -1 if not match.
+ and return the index where the matching end, return -1 if not match,
+ or return -2 in case of an error.
FL_SEARCH means we must search where the matching starts,
FL_LONGEST_MATCH means we want the POSIX longest matching. */
@@ -661,11 +700,15 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
re_dfastate_t **state_log;
int start_idx, fl_search, fl_longest_match;
{
+ reg_errcode_t err;
int match = 0, match_last = -1;
re_dfastate_t *cur_state;
- cur_state = acquire_init_state_context (preg, input, start_idx,
+ cur_state = acquire_init_state_context (&err, preg, input, start_idx,
mctx->eflags);
+ /* An initial state must not be NULL(invalid state). */
+ if (cur_state == NULL)
+ return -2;
if (state_log != NULL)
state_log[start_idx] = cur_state;
/* If the RE accepts NULL string. */
@@ -687,11 +730,13 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
while (!re_string_eoi (input))
{
- cur_state = transit_state (preg, cur_state, input, fl_search && !match,
- state_log, mctx);
- if (cur_state == NULL) /* Reached at the invalid state. */
+ cur_state = transit_state (&err, preg, cur_state, input,
+ fl_search && !match, state_log, mctx);
+ if (cur_state == NULL) /* Reached at the invalid state or an error. */
{
int cur_str_idx = re_string_cur_idx (input);
+ if (err != REG_NOERROR)
+ return -2;
if (fl_search && !match)
{
/* Restart from initial state, since we are searching
@@ -699,9 +744,11 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
#ifdef RE_ENABLE_I18N
if (MB_CUR_MAX == 1 || re_string_first_byte (input, cur_str_idx))
#endif /* RE_ENABLE_I18N */
- cur_state = acquire_init_state_context (preg, input,
+ cur_state = acquire_init_state_context (&err, preg, input,
cur_str_idx,
mctx->eflags);
+ if (cur_state == NULL && err != REG_NOERROR)
+ return -2;
if (state_log != NULL)
state_log[cur_str_idx] = cur_state;
}
@@ -787,9 +834,10 @@ check_halt_state_context (preg, state, input, idx, eflags)
return 0;
}
-/* Compute the next node to which "NFA" transit from NODE.
- Return the destination node, and update EPS_VIA_NODES.
- ("NFA" is a NFA corresponding to the DFA. */
+/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
+ corresponding to the DFA).
+ Return the destination node, and update EPS_VIA_NODES, return -1 in case
+ of errors. */
static int
proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
@@ -801,10 +849,12 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
re_node_set *eps_via_nodes;
{
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
- int i, dest_node = -1;
+ int i, dest_node = -1, err;
if (IS_EPSILON_NODE (dfa->nodes[node].type))
{
- re_node_set_insert (eps_via_nodes, node);
+ err = re_node_set_insert (eps_via_nodes, node);
+ if (err < 0)
+ return -1;
for (i = 0; i < state_log[*pidx]->nodes.nelem; ++i)
{
int candidate = state_log[*pidx]->nodes.elems[i];
@@ -845,7 +895,9 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
}
if (naccepted == 0)
{
- re_node_set_insert (eps_via_nodes, node);
+ err = re_node_set_insert (eps_via_nodes, node);
+ if (err < 0)
+ return -1;
dest_node = dfa->nexts[node];
if (re_node_set_contains (&state_log[*pidx]->nodes, dest_node))
return dest_node;
@@ -885,7 +937,7 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
Note: We assume that pmatch[0] is already set, and
pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
-static void
+static reg_errcode_t
set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
const regex_t *preg;
re_dfastate_t **state_log;
@@ -944,9 +996,11 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
/* Proceed to next node. */
cur_node = proceed_next_node (preg, state_log, mctx, input, &idx,
cur_node, &eps_via_nodes);
+ if (cur_node < 0)
+ return REG_ESPACE;
}
re_node_set_free (&eps_via_nodes);
- return;
+ return REG_NOERROR;
}
#define NUMBER_OF_STATE 1
@@ -974,7 +1028,7 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
#define STATE_NODE_CONTAINS(state,node) \
((state) != NULL && re_node_set_contains (&(state)->nodes, node))
-static void
+static reg_errcode_t
sift_states_backward (preg, state_log, mctx, input, last_node)
const regex_t *preg;
re_dfastate_t **state_log;
@@ -982,6 +1036,7 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
const re_string_t *input;
int last_node;
{
+ reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
re_node_set state_buf;
int str_idx = mctx->match_last;
@@ -990,18 +1045,28 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
#ifdef DEBUG
assert (state_log != NULL && state_log[str_idx] != NULL);
#endif
- re_node_set_alloc (&state_buf, NUMBER_OF_STATE);
+ err = re_node_set_alloc (&state_buf, NUMBER_OF_STATE);
+ if (err != REG_NOERROR)
+ return err;
plog = &state_log[str_idx]->nodes;
/* Build sifted state_log[str_idx]. It has the nodes which can epsilon
transit to the last_node and the last_node itself. */
- re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node);
+ err = re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node);
+ if (err != REG_NOERROR)
+ return err;
if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref)
- add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
+ {
+ err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
+ if (err != REG_NOERROR)
+ return err;
+ }
/* Update state log. */
- state_log[str_idx] = re_acquire_state (dfa, &state_buf);
+ state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
+ if (state_log[str_idx] == NULL && err != REG_NOERROR)
+ return err;
/* Then check each states in the state_log. */
while (str_idx > mctx->match_first)
@@ -1062,17 +1127,26 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
/* `prev_node' may point the entity of the OP_CONTEXT_NODE,
then we use plog->elems[i] instead. */
- re_node_set_add_intersect (&state_buf, plog,
- dfa->inveclosures + prev_node);
+ err = re_node_set_add_intersect (&state_buf, plog,
+ dfa->inveclosures + prev_node);
+ if (err != REG_NOERROR)
+ return err;
}
if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref)
- add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
+ {
+ err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
+ if (err != REG_NOERROR)
+ return err;
+ }
/* Update state_log. */
- state_log[str_idx] = re_acquire_state (dfa, &state_buf);
+ state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
+ if (state_log[str_idx] == NULL && err != REG_NOERROR)
+ return err;
}
re_node_set_free (&state_buf);
+ return REG_NOERROR;
}
/* Helper functions. */
@@ -1136,7 +1210,7 @@ sift_states_iter_bkref (dfa, state_log, mctx_entry, node_idx, idx, match_first,
return naccepted;
}
-static void
+static reg_errcode_t
add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
const re_dfa_t *dfa;
const re_match_context_t *mctx;
@@ -1164,12 +1238,16 @@ add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
}
if (j < mctx->nbkref_ents || idx == mctx->match_first)
{
- re_node_set_add_intersect (state_buf, plog,
- dfa->inveclosures + node_idx);
+ reg_errcode_t err;
+ err = re_node_set_add_intersect (state_buf, plog,
+ dfa->inveclosures + node_idx);
+ if (err != REG_NOERROR)
+ return err;
i = 0;
}
}
}
+ return REG_NOERROR;
}
/* Functions for state transition. */
@@ -1180,17 +1258,19 @@ add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
update the destination of STATE_LOG. */
static re_dfastate_t *
-transit_state (preg, state, input, fl_search, state_log, mctx)
- const regex_t *preg;
- re_dfastate_t *state, **state_log;
- re_string_t *input;
- int fl_search;
- re_match_context_t *mctx;
+transit_state (err, preg, state, input, fl_search, state_log, mctx)
+ reg_errcode_t *err;
+ const regex_t *preg;
+ re_dfastate_t *state, **state_log;
+ re_string_t *input;
+ int fl_search;
+ re_match_context_t *mctx;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfastate_t **trtable, *next_state;
unsigned char ch;
+ *err = REG_NOERROR;
if (state == NULL)
{
next_state = state;
@@ -1200,7 +1280,11 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
{
/* If the current state can accept multibyte. */
if (state->accept_mb)
- transit_state_mb (preg, state, input, state_log, mctx);
+ {
+ *err = transit_state_mb (preg, state, input, state_log, mctx);
+ if (*err != REG_NOERROR)
+ return NULL;
+ }
/* Then decide the next state with the single byte. */
if (1)
@@ -1221,7 +1305,10 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
else
{
/* don't use transition table */
- next_state = transit_state_sb (preg, state, input, fl_search, mctx);
+ next_state = transit_state_sb (err, preg, state, input, fl_search,
+ mctx);
+ if (next_state == NULL && err != REG_NOERROR)
+ return NULL;
}
}
@@ -1252,7 +1339,10 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
if (next_state != NULL)
{
table_nodes = next_state->entrance_nodes;
- re_node_set_init_union (&next_nodes, table_nodes, log_nodes);
+ *err = re_node_set_init_union (&next_nodes, table_nodes,
+ log_nodes);
+ if (*err != REG_NOERROR)
+ return NULL;
}
else
next_nodes = *log_nodes;
@@ -1262,14 +1352,19 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
context = re_string_context_at (input, re_string_cur_idx (input) - 1,
mctx->eflags, preg->newline_anchor);
next_state = state_log[cur_idx]
- = re_acquire_state_context (dfa, &next_nodes, context);
+ = re_acquire_state_context (err, dfa, &next_nodes, context);
+ /* We don't need to check errors here, since the return value of
+ this function is next_state and ERR is already set. */
+
if (table_nodes != NULL)
re_node_set_free (&next_nodes);
}
/* If the next state has back references. */
if (next_state != NULL && next_state->has_backref)
{
- transit_state_bkref (preg, next_state, input, state_log, mctx);
+ *err = transit_state_bkref (preg, next_state, input, state_log, mctx);
+ if (*err != REG_NOERROR)
+ return NULL;
next_state = state_log[cur_idx];
}
}
@@ -1282,12 +1377,13 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
accepting the current input byte. */
static re_dfastate_t *
-transit_state_sb (preg, state, input, fl_search, mctx)
- const regex_t *preg;
- re_dfastate_t *state;
- re_string_t *input;
- int fl_search;
- re_match_context_t *mctx;
+transit_state_sb (err, preg, state, input, fl_search, mctx)
+ reg_errcode_t *err;
+ const regex_t *preg;
+ re_dfastate_t *state;
+ re_string_t *input;
+ int fl_search;
+ re_match_context_t *mctx;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_node_set next_nodes;
@@ -1295,14 +1391,20 @@ transit_state_sb (preg, state, input, fl_search, mctx)
int node_cnt, cur_str_idx = re_string_cur_idx (input);
unsigned int context;
- re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
+ *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
+ if (*err != REG_NOERROR)
+ return NULL;
for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
{
int cur_node = state->nodes.elems[node_cnt];
if (check_node_accept (preg, dfa->nodes + cur_node, input,
cur_str_idx, mctx->eflags))
- re_node_set_merge (&next_nodes,
- dfa->eclosures + dfa->nexts[cur_node]);
+ {
+ *err = re_node_set_merge (&next_nodes,
+ dfa->eclosures + dfa->nexts[cur_node]);
+ if (*err != REG_NOERROR)
+ return NULL;
+ }
}
if (fl_search)
{
@@ -1317,23 +1419,32 @@ transit_state_sb (preg, state, input, fl_search, mctx)
}
if (!not_initial)
#endif
- re_node_set_merge (&next_nodes, dfa->init_state->entrance_nodes);
+ {
+ *err = re_node_set_merge (&next_nodes,
+ dfa->init_state->entrance_nodes);
+ if (*err != REG_NOERROR)
+ return NULL;
+ }
}
context = re_string_context_at (input, cur_str_idx, mctx->eflags,
preg->newline_anchor);
- next_state = re_acquire_state_context (dfa, &next_nodes, context);
+ next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
+ /* We don't need to check errors here, since the return value of
+ this function is next_state and ERR is already set. */
+
re_node_set_free (&next_nodes);
re_string_skip_bytes (input, 1);
return next_state;
}
-static void
+static reg_errcode_t
transit_state_mb (preg, pstate, input, state_log, mctx)
const regex_t *preg;
re_dfastate_t *pstate, **state_log;
const re_string_t *input;
re_match_context_t *mctx;
{
+ reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i;
@@ -1376,39 +1487,50 @@ transit_state_mb (preg, pstate, input, state_log, mctx)
if (dest_state == NULL)
dest_nodes = *new_nodes;
else
- re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes,
- new_nodes);
+ {
+ err = re_node_set_init_union (&dest_nodes,
+ dest_state->entrance_nodes, new_nodes);
+ if (err != REG_NOERROR)
+ return err;
+ }
context = re_string_context_at (input, dest_idx - 1, mctx->eflags,
preg->newline_anchor);
- state_log[dest_idx] = re_acquire_state_context (dfa, &dest_nodes, context);
+ state_log[dest_idx] = re_acquire_state_context (&err, dfa, &dest_nodes,
+ context);
+ if (state_log[dest_idx] == NULL && err != REG_NOERROR)
+ return err;
if (dest_state != NULL)
re_node_set_free (&dest_nodes);
}
+ return REG_NOERROR;
}
-static void
+static reg_errcode_t
transit_state_bkref (preg, pstate, input, state_log, mctx)
const regex_t *preg;
re_dfastate_t *pstate, **state_log;
const re_string_t *input;
re_match_context_t *mctx;
{
+ reg_errcode_t err;
re_dfastate_t **work_state_log;
#ifdef DEBUG
assert (mctx->match_first != -1);
#endif
work_state_log = re_malloc (re_dfastate_t *, re_string_cur_idx (input) + 1);
+ if (work_state_log == NULL)
+ return REG_ESPACE;
- transit_state_bkref_loop (preg, input, &pstate->nodes, work_state_log,
- state_log, mctx);
-
+ err = transit_state_bkref_loop (preg, input, &pstate->nodes, work_state_log,
+ state_log, mctx);
re_free (work_state_log);
+ return err;
}
/* Caller must allocate `work_state_log'. */
-static void
+static reg_errcode_t
transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
const regex_t *preg;
const re_string_t *input;
@@ -1416,10 +1538,13 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
re_dfastate_t **work_state_log, **state_log;
re_match_context_t *mctx;
{
+ reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i, j;
regmatch_t *cur_regs = re_malloc (regmatch_t, preg->re_nsub + 1);
int cur_str_idx = re_string_cur_idx (input);
+ if (cur_regs == NULL)
+ return REG_ESPACE;
for (i = 0; i < nodes->nelem; ++i)
{
@@ -1474,7 +1599,9 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
/* Successfully matched, add a new cache entry. */
dest_str_idx = cur_str_idx + subexp_len;
- match_ctx_add_entry (mctx, node_idx, cur_str_idx, dest_str_idx);
+ err = match_ctx_add_entry (mctx, node_idx, cur_str_idx, dest_str_idx);
+ if (err != REG_NOERROR)
+ return err;
clean_state_log_if_need (state_log, mctx, dest_str_idx);
/* And add the epsilon closures (which is `new_dest_nodes') of
@@ -1494,29 +1621,44 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
: state_log[cur_str_idx]->nodes.nelem);
/* Add `new_dest_node' to state_log. */
if (dest_state == NULL)
- state_log[dest_str_idx] = re_acquire_state_context (dfa,
- new_dest_nodes,
- context);
+ {
+ state_log[dest_str_idx] = re_acquire_state_context (&err, dfa,
+ new_dest_nodes,
+ context);
+ if (state_log[dest_str_idx] == NULL && err != REG_NOERROR)
+ return err;
+ }
else
{
re_node_set dest_nodes;
- re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes,
- new_dest_nodes);
- state_log[dest_str_idx] = re_acquire_state_context (dfa, &dest_nodes,
+ err = re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes,
+ new_dest_nodes);
+ if (err != REG_NOERROR)
+ return err;
+ state_log[dest_str_idx] = re_acquire_state_context (&err, dfa,
+ &dest_nodes,
context);
+ if (state_log[dest_str_idx] == NULL && err != REG_NOERROR)
+ return err;
re_node_set_free (&dest_nodes);
}
/* We need to check recursively if the backreference can epsilon
transit. */
if (subexp_len == 0 && state_log[cur_str_idx]->nodes.nelem > prev_nelem)
- transit_state_bkref_loop (preg, input, new_dest_nodes, work_state_log,
- state_log, mctx);
+ {
+ err = transit_state_bkref_loop (preg, input, new_dest_nodes,
+ work_state_log, state_log, mctx);
+ if (err != REG_NOERROR)
+ return err;
+ }
}
re_free (cur_regs);
+ return REG_NOERROR;
}
-/* Build transition table for the state. */
+/* Build transition table for the state.
+ Return the new table if succeeded, otherwise return NULL. */
static re_dfastate_t **
build_trtable (preg, state, fl_search)
@@ -1524,6 +1666,7 @@ build_trtable (preg, state, fl_search)
const re_dfastate_t *state;
int fl_search;
{
+ reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i, j, k, ch;
int ndests; /* Number of the destination states from `state'. */
@@ -1541,15 +1684,18 @@ build_trtable (preg, state, fl_search)
/* Initialize transiton table. */
trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ if (dests_node == NULL || dests_ch == NULL || trtable == NULL)
+ return NULL;
/* At first, group all nodes belonging to `state' into several
destinations. */
ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
- if (ndests == 0)
+ if (ndests <= 0)
{
re_free (dests_node);
re_free (dests_ch);
- return trtable;
+ /* Return NULL in case of an error, trtable otherwise. */
+ return (ndests < 0) ? NULL : trtable;
}
dest_states = re_malloc (re_dfastate_t *, ndests);
@@ -1557,7 +1703,11 @@ build_trtable (preg, state, fl_search)
dest_states_nl = re_malloc (re_dfastate_t *, ndests);
bitset_empty (acceptable);
- re_node_set_alloc (&follows, ndests + 1);
+ err = re_node_set_alloc (&follows, ndests + 1);
+ if (dest_states == NULL || dest_states_word == NULL || dest_states_nl == NULL
+ || err != REG_NOERROR)
+ return NULL;
+
/* Then build the states for all destinations. */
for (i = 0; i < ndests; ++i)
{
@@ -1569,7 +1719,9 @@ build_trtable (preg, state, fl_search)
next_node = dfa->nexts[dests_node[i].elems[j]];
if (next_node != -1)
{
- re_node_set_merge (&follows, dfa->eclosures + next_node);
+ err = re_node_set_merge (&follows, dfa->eclosures + next_node);
+ if (err != REG_NOERROR)
+ return NULL;
}
}
/* If search flag is set, merge the initial state. */
@@ -1585,17 +1737,28 @@ build_trtable (preg, state, fl_search)
}
if (!not_initial)
#endif
- re_node_set_merge (&follows, dfa->init_state->entrance_nodes);
+ {
+ err = re_node_set_merge (&follows,
+ dfa->init_state->entrance_nodes);
+ if (err != REG_NOERROR)
+ return NULL;
+ }
}
- dest_states[i] = re_acquire_state_context (dfa, &follows, 0);
+ dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
+ if (dest_states[i] == NULL && err != REG_NOERROR)
+ return NULL;
/* If the new state has context constraint,
build appropriate states for these contexts. */
if (dest_states[i]->has_constraint)
{
- dest_states_word[i] = re_acquire_state_context (dfa, &follows,
+ dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
CONTEXT_WORD);
- dest_states_nl[i] = re_acquire_state_context (dfa, &follows,
+ if (dest_states_word[i] == NULL && err != REG_NOERROR)
+ return NULL;
+ dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
CONTEXT_NEWLINE);
+ if (dest_states_nl[i] == NULL && err != REG_NOERROR)
+ return NULL;
}
else
{
@@ -1654,6 +1817,7 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
re_node_set *dests_node;
bitset *dests_ch;
{
+ reg_errcode_t err;
const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i, j, k;
int ndests; /* Number of the destinations from `state'. */
@@ -1750,12 +1914,16 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
{
bitset_copy (dests_ch[ndests], remains);
bitset_copy (dests_ch[j], intersec);
- re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
+ err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
+ if (err != REG_NOERROR)
+ return -1;
++ndests;
}
/* Put the position in the current group. */
- re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
+ err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
+ if (err < 0)
+ return -1;
/* If all characters are consumed, go to next node. */
if (!not_consumed)
@@ -1765,7 +1933,9 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
if (j == ndests)
{
bitset_copy (dests_ch[ndests], accepts);
- re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
+ err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
+ if (err != REG_NOERROR)
+ return -1;
++ndests;
bitset_empty (accepts);
}
@@ -2028,7 +2198,7 @@ check_node_accept (preg, node, input, idx, eflags)
/* Functions for matching context. */
-static void
+static reg_errcode_t
match_ctx_init (mctx, eflags, n)
re_match_context_t *mctx;
int eflags;
@@ -2037,12 +2207,17 @@ match_ctx_init (mctx, eflags, n)
mctx->eflags = eflags;
mctx->match_first = mctx->match_last = -1;
if (n > 0)
- mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
+ {
+ mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
+ if (mctx->bkref_ents == NULL)
+ return REG_ESPACE;
+ }
else
mctx->bkref_ents = NULL;
mctx->nbkref_ents = 0;
mctx->abkref_ents = n;
mctx->max_bkref_len = 0;
+ return REG_NOERROR;
}
static void
@@ -2054,7 +2229,7 @@ match_ctx_free (mctx)
/* Add a new backreference entry to the cache. */
-static void
+static reg_errcode_t
match_ctx_add_entry (mctx, node, from, to)
re_match_context_t *mctx;
int node, from, to;
@@ -2064,6 +2239,8 @@ match_ctx_add_entry (mctx, node, from, to)
mctx->bkref_ents = re_realloc (mctx->bkref_ents,
struct re_backref_cache_entry,
mctx->abkref_ents * 2);
+ if (mctx->bkref_ents == NULL)
+ return REG_ESPACE;
memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
mctx->abkref_ents *= 2;
@@ -2073,4 +2250,5 @@ match_ctx_add_entry (mctx, node, from, to)
mctx->bkref_ents[mctx->nbkref_ents++].to = to;
if (mctx->max_bkref_len < to - from)
mctx->max_bkref_len = to - from;
+ return REG_NOERROR;
}