aboutsummaryrefslogtreecommitdiff
path: root/posix/regexec.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-04-24 21:54:53 +0000
committerUlrich Drepper <drepper@redhat.com>2002-04-24 21:54:53 +0000
commit612546c60dd28d7af44fbb2bc98c69c33b4a0c49 (patch)
tree11dc6bf94bd7beb3271366b33a86be162b3bfa6e /posix/regexec.c
parentbe479a6dfe81c5b426e2cbabd62be2c042ab2d45 (diff)
downloadglibc-612546c60dd28d7af44fbb2bc98c69c33b4a0c49.tar
glibc-612546c60dd28d7af44fbb2bc98c69c33b4a0c49.tar.gz
glibc-612546c60dd28d7af44fbb2bc98c69c33b4a0c49.tar.bz2
glibc-612546c60dd28d7af44fbb2bc98c69c33b4a0c49.zip
Update.
2002-04-22 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp.c (re_compile_internal): Adapt it to new interface of buffer building functions. * posix/regex_internal.c (re_string_allocate): New function. (re_string_realloc_buffers): New function. (re_string_skip_chars): New function. (re_string_reconstruct): New function. (re_string_construct): Adapt it to new interface of buffer building functions. (re_string_construct_common): Likewise. (build_wcs_buffer): Likewise. (build_wcs_upper_buffer): Likewise. (build_upper_buffer): Likewise. (re_string_translate_buffer): Likewise. (re_string_context_at): Adapt it to variable length buffers. * posix/regex_internal.h (re_string_t): Add new fields to handle variable length buffers. (re_match_context_t): Likewise. * posix/regexec.c (re_search_internal): Adapt it to new interface of re_string_t and re_match_context_t. (acquire_init_state_context): Likewise. (check_matching): Likewise. (check_halt_state_context): Likewise. (proceed_next_node): Likewise. (set_regs): Likewise. (sift_states_backward): Likewise. (clean_state_log_if_need): Likewise. (sift_states_iter_mb): Likewise. (sift_states_iter_bkref): Likewise. (add_epsilon_backreference): Likewise. (transit_state): Likewise. (transit_state_sb): Likewise. (transit_state_mb): Likewise. (transit_state_bkref): Likewise. (transit_state_bkref_loop): Likewise. (check_node_accept): Likewise. (match_ctx_init): Likewise. (extend_buffers): New function. 2002-04-21 Bruno Haible <bruno@clisp.org> * iconvdata/tst-table.sh: For the second check, use the truncated GB18030 charmap table, like for the first check.
Diffstat (limited to 'posix/regexec.c')
-rw-r--r--posix/regexec.c631
1 files changed, 362 insertions, 269 deletions
diff --git a/posix/regexec.c b/posix/regexec.c
index a069d7d3af..e888970936 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -39,7 +39,7 @@
#include "regex_internal.h"
static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
- int n);
+ re_string_t *input, int n);
static void match_ctx_free (re_match_context_t *cache);
static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
int from, int to);
@@ -49,68 +49,54 @@ static reg_errcode_t re_search_internal (const regex_t *preg,
regmatch_t pmatch[], int eflags);
static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
const regex_t *preg,
- const re_string_t *input,
- int idx, int eflags);
-static int check_matching (const regex_t *preg, re_string_t *input,
- re_match_context_t *mctx, re_dfastate_t **state_log,
- int start_idx, int fl_search, int fl_longest_match);
+ const re_match_context_t *mctx,
+ int idx);
+static int check_matching (const regex_t *preg, re_match_context_t *mctx,
+ int fl_search, int fl_longest_match);
static int check_halt_node_context (const re_dfa_t *dfa, int node,
unsigned int context);
static int check_halt_state_context (const regex_t *preg,
const re_dfastate_t *state,
- const re_string_t *input, int idx,
- int eflags);
+ const re_match_context_t *mctx, int idx);
static int proceed_next_node (const regex_t *preg,
- re_dfastate_t **state_log,
const re_match_context_t *mctx,
- const re_string_t *input,
int *pidx, int node, re_node_set *eps_via_nodes);
-static reg_errcode_t set_regs (const regex_t *preg, re_dfastate_t **state_log,
+static reg_errcode_t set_regs (const regex_t *preg,
const re_match_context_t *mctx,
- const re_string_t *input, size_t nmatch,
- regmatch_t *pmatch, int last);
-static int sift_states_iter_mb (const regex_t *preg, re_dfastate_t **state_log,
+ size_t nmatch, regmatch_t *pmatch, int last);
+static int sift_states_iter_mb (const regex_t *preg,
const re_match_context_t *mctx,
- const re_string_t *input, int node_idx,
- int str_idx, int max_str_idx);
+ int node_idx, int str_idx, int max_str_idx);
static int sift_states_iter_bkref (const re_dfa_t *dfa,
re_dfastate_t **state_log,
struct re_backref_cache_entry *mctx_entry,
- int node_idx, int idx, int match_first,
- int match_last);
+ int node_idx, int idx, int match_last);
static reg_errcode_t sift_states_backward (const regex_t *preg,
- re_dfastate_t **state_log,
const re_match_context_t *mctx,
- const re_string_t *input,
int last_node);
+static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
+ int next_state_log_idx);
static reg_errcode_t add_epsilon_backreference (const re_dfa_t *dfa,
const re_match_context_t *mctx,
const re_node_set *plog,
int idx,
re_node_set *state_buf);
static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
- re_dfastate_t *state, re_string_t *input,
- int fl_search, re_dfastate_t **state_log,
- re_match_context_t *mctx);
+ re_match_context_t *mctx,
+ re_dfastate_t *state, int fl_search);
static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
re_dfastate_t *pstate,
- re_string_t *input, int fl_search,
+ int fl_search,
re_match_context_t *mctx);
static reg_errcode_t transit_state_mb (const regex_t *preg,
re_dfastate_t *pstate,
- const re_string_t *input,
- re_dfastate_t **state_log,
re_match_context_t *mctx);
static reg_errcode_t transit_state_bkref (const regex_t *preg,
re_dfastate_t *pstate,
- const re_string_t *input,
- re_dfastate_t **state_log,
re_match_context_t *mctx);
static reg_errcode_t transit_state_bkref_loop (const regex_t *preg,
- const re_string_t *input,
re_node_set *nodes,
re_dfastate_t **work_state_log,
- re_dfastate_t **state_log,
re_match_context_t *mctx);
static re_dfastate_t **build_trtable (const regex_t *dfa,
const re_dfastate_t *state,
@@ -124,7 +110,8 @@ static int group_nodes_into_DFAstates (const regex_t *dfa,
re_node_set *states_node,
bitset *states_ch);
static int check_node_accept (const regex_t *preg, const re_token_t *node,
- const re_string_t *input, int idx, int eflags);
+ const re_match_context_t *mctx, int idx);
+static reg_errcode_t extend_buffers (re_match_context_t *mctx);
/* Entry point for POSIX code. */
@@ -523,7 +510,7 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
re_string_t input;
- re_dfastate_t **state_log;
+ int left_lim, right_lim, incr;
int fl_longest_match, match_first, match_last = -1;
re_match_context_t mctx;
char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate)
@@ -540,53 +527,91 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
/* We must check the longest matching, if nmatch > 0. */
fl_longest_match = (nmatch != 0);
+ err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
+ preg->translate, preg->syntax & RE_ICASE);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
/* We will log all the DFA states through which the dfa pass,
if nmatch > 1, or this dfa has "multibyte node", which is a
back-reference or a node which can accept multibyte character or
multi character collating element. */
if (nmatch > 1 || dfa->has_mb_node)
{
- state_log = re_malloc (re_dfastate_t *, length + 1);
- if (BE (state_log == NULL, 0))
+ mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
+ if (BE (mctx.state_log == NULL, 0))
return REG_ESPACE;
}
else
- state_log = NULL;
-
- if (preg->syntax & RE_ICASE)
- err = re_string_construct_toupper (&input, string, length, preg->translate);
- else
- err = re_string_construct (&input, string, length, preg->translate);
- if (BE (err != REG_NOERROR, 0))
- return err;
-
- err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
- if (BE (err != REG_NOERROR, 0))
- return err;
+ mctx.state_log = NULL;
#ifdef DEBUG
/* We assume front-end functions already check them. */
assert (start + range >= 0 && start + range <= length);
#endif
+ match_first = start;
+ input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+
/* Check incrementally whether of not the input string match. */
- for (match_first = start; ;)
+ incr = (range < 0) ? -1 : 1;
+ left_lim = (range < 0) ? start + range : start;
+ right_lim = (range < 0) ? start : start + range;
+
+ for (;;)
{
- if ((match_first < length
- && (fastmap == NULL
- || fastmap[re_string_byte_at (&input, match_first)]))
- || preg->can_be_null)
+ /* At first get the current byte from input string. */
+ int ch;
+ if (MB_CUR_MAX > 1 && (preg->syntax & RE_ICASE || preg->translate))
+ {
+ /* In this case, we can't determin easily the current byte,
+ since it might be a component byte of a multibyte character.
+ Then we use the constructed buffer instead. */
+ /* If MATCH_FIRST is out of the valid range, reconstruct the
+ buffers. */
+ if (input.raw_mbs_idx + input.valid_len <= match_first)
+ re_string_reconstruct (&input, match_first, eflags,
+ preg->newline_anchor);
+ /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+ Note that MATCH_FIRST must not be smaller than 0. */
+ ch = ((match_first >= length) ? 0
+ : re_string_byte_at (&input, match_first - input.raw_mbs_idx));
+ }
+ else
+ {
+ /* We apply translate/conversion manually, since it is trivial
+ in this case. */
+ /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+ Note that MATCH_FIRST must not be smaller than 0. */
+ ch = (match_first < length) ? (unsigned char)string[match_first] : 0;
+ /* Apply translation if we need. */
+ ch = preg->translate ? preg->translate[ch] : ch;
+ /* In case of case insensitive mode, convert to upper case. */
+ ch = ((preg->syntax & RE_ICASE) && islower (ch)) ? toupper (ch) : ch;
+ }
+
+ /* Eliminate inappropriate one by fastmap. */
+ if (preg->can_be_null || fastmap == NULL || fastmap[ch])
{
+ /* Reconstruct the buffers so that the matcher can assume that
+ the matching starts from the begining of the buffer. */
+ re_string_reconstruct (&input, match_first, eflags,
+ preg->newline_anchor);
#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX == 1 || re_string_first_byte (&input, match_first))
+ /* Eliminate it when it is a component of a multibyte character
+ and isn't the head of a multibyte character. */
+ if (MB_CUR_MAX == 1 || re_string_first_byte (&input, 0))
#endif
{
- /* We assume that the matching starts from `match_first'. */
- re_string_set_index (&input, match_first);
- mctx.match_first = mctx.state_log_top = match_first;
- mctx.nbkref_ents = mctx.max_bkref_len = 0;
- match_last = check_matching (preg, &input, &mctx, state_log,
- match_first, 0, fl_longest_match);
+ /* It seems to be appropriate one, then use the matcher. */
+ /* We assume that the matching starts from 0. */
+ mctx.state_log_top = mctx.nbkref_ents = mctx.max_bkref_len = 0;
+ match_last = check_matching (preg, &mctx, 0, fl_longest_match);
if (match_last != -1)
{
if (BE (match_last == -2, 0))
@@ -597,18 +622,9 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
}
}
/* Update counter. */
- if (range < 0)
- {
- --match_first;
- if (match_first < start + range)
- break;
- }
- else
- {
- ++match_first;
- if (match_first > start + range)
- break;
- }
+ match_first += incr;
+ if (match_first < left_lim || right_lim < match_first)
+ break;
}
/* Set pmatch[] if we need. */
@@ -621,30 +637,38 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
/* Set the points where matching start/end. */
- pmatch[0].rm_so = mctx.match_first;
+ pmatch[0].rm_so = 0;
mctx.match_last = pmatch[0].rm_eo = match_last;
if (!preg->no_sub && nmatch > 1)
{
/* We need the ranges of all the subexpressions. */
int halt_node;
- re_dfastate_t *pstate = state_log[match_last];
+ re_dfastate_t *pstate = mctx.state_log[match_last];
#ifdef DEBUG
- assert (state_log != NULL);
+ assert (mctx.state_log != NULL);
#endif
- halt_node = check_halt_state_context (preg, pstate, &input,
- match_last, eflags);
- err = sift_states_backward (preg, state_log, &mctx, &input, halt_node);
+ halt_node = check_halt_state_context (preg, pstate, &mctx,
+ match_last);
+ err = sift_states_backward (preg, &mctx, halt_node);
if (BE (err != REG_NOERROR, 0))
return err;
- err = set_regs (preg, state_log, &mctx, &input, nmatch, pmatch,
- halt_node);
+ err = set_regs (preg, &mctx, nmatch, pmatch, halt_node);
if (BE (err != REG_NOERROR, 0))
return err;
}
+
+ /* At last, add the offset to the each registers, since we slided
+ the buffers so that We can assume that the matching starts from 0. */
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ if (pmatch[reg_idx].rm_so != -1)
+ {
+ pmatch[reg_idx].rm_so += match_first;
+ pmatch[reg_idx].rm_eo += match_first;
+ }
}
- re_free (state_log);
+ re_free (mctx.state_log);
if (dfa->nbackref)
match_ctx_free (&mctx);
re_string_destruct (&input);
@@ -656,11 +680,11 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
since initial states may have constraints like "\<", "^", etc.. */
static inline re_dfastate_t *
-acquire_init_state_context (err, preg, input, idx, eflags)
+acquire_init_state_context (err, preg, mctx, idx)
reg_errcode_t *err;
const regex_t *preg;
- const re_string_t *input;
- int idx, eflags;
+ const re_match_context_t *mctx;
+ int idx;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
@@ -668,7 +692,7 @@ acquire_init_state_context (err, preg, input, idx, eflags)
if (dfa->init_state->has_constraint)
{
unsigned int context;
- context = re_string_context_at (input, idx - 1, eflags,
+ context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
preg->newline_anchor);
if (IS_WORD_CONTEXT (context))
return dfa->init_state_word;
@@ -697,52 +721,51 @@ acquire_init_state_context (err, preg, input, idx, eflags)
and return the index where the matching end, return -1 if not match,
or return -2 in case of an error.
FL_SEARCH means we must search where the matching starts,
- FL_LONGEST_MATCH means we want the POSIX longest matching. */
+ FL_LONGEST_MATCH means we want the POSIX longest matching.
+ Note that the matcher assume that the maching starts from the current
+ index of the buffer. */
static int
-check_matching (preg, input, mctx, state_log, start_idx, fl_search,
- fl_longest_match)
+check_matching (preg, mctx, fl_search, fl_longest_match)
const regex_t *preg;
- re_string_t *input;
re_match_context_t *mctx;
- re_dfastate_t **state_log;
- int start_idx, fl_search, fl_longest_match;
+ int fl_search, fl_longest_match;
{
reg_errcode_t err;
- int match = 0, match_last = -1;
+ int match = 0;
+ int match_last = -1;
+ int cur_str_idx = re_string_cur_idx (mctx->input);
re_dfastate_t *cur_state;
- cur_state = acquire_init_state_context (&err, preg, input, start_idx,
- mctx->eflags);
+ cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
/* An initial state must not be NULL(invalid state). */
if (BE (cur_state == NULL, 0))
return -2;
- if (state_log != NULL)
- state_log[start_idx] = cur_state;
+ if (mctx->state_log != NULL)
+ mctx->state_log[cur_str_idx] = cur_state;
/* If the RE accepts NULL string. */
if (cur_state->halt)
{
if (!cur_state->has_constraint
- || check_halt_state_context (preg, cur_state, input, start_idx,
- mctx->eflags))
+ || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
{
if (!fl_longest_match)
- return start_idx;
+ return cur_str_idx;
else
{
- match_last = start_idx;
+ match_last = cur_str_idx;
match = 1;
}
}
}
- while (!re_string_eoi (input))
+ while (!re_string_eoi (mctx->input))
{
- cur_state = transit_state (&err, preg, cur_state, input,
- fl_search && !match, state_log, mctx);
+ cur_state = transit_state (&err, preg, mctx, cur_state,
+ fl_search && !match);
if (cur_state == NULL) /* Reached at the invalid state or an error. */
{
- int cur_str_idx = re_string_cur_idx (input);
+ cur_str_idx = re_string_cur_idx (mctx->input);
if (BE (err != REG_NOERROR, 0))
return -2;
if (fl_search && !match)
@@ -750,27 +773,27 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
/* Restart from initial state, since we are searching
the point from where matching start. */
#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX == 1 || re_string_first_byte (input, cur_str_idx))
+ if (MB_CUR_MAX == 1
+ || re_string_first_byte (mctx->input, cur_str_idx))
#endif /* RE_ENABLE_I18N */
- cur_state = acquire_init_state_context (&err, preg, input,
- cur_str_idx,
- mctx->eflags);
+ cur_state = acquire_init_state_context (&err, preg, mctx,
+ cur_str_idx);
if (BE (cur_state == NULL && err != REG_NOERROR, 0))
return -2;
- if (state_log != NULL)
- state_log[cur_str_idx] = cur_state;
+ if (mctx->state_log != NULL)
+ mctx->state_log[cur_str_idx] = cur_state;
}
else if (!fl_longest_match && match)
break;
else /* (fl_longest_match && match) || (!fl_search && !match) */
{
- if (state_log == NULL)
+ if (mctx->state_log == NULL)
break;
else
{
int max = mctx->state_log_top;
for (; cur_str_idx <= max; ++cur_str_idx)
- if (state_log[cur_str_idx] != NULL)
+ if (mctx->state_log[cur_str_idx] != NULL)
break;
if (cur_str_idx > max)
break;
@@ -783,12 +806,11 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
/* Reached at a halt state.
Check the halt state can satisfy the current context. */
if (!cur_state->has_constraint
- || check_halt_state_context (preg, cur_state, input,
- re_string_cur_idx (input),
- mctx->eflags))
+ || check_halt_state_context (preg, cur_state, mctx,
+ re_string_cur_idx (mctx->input)))
{
/* We found an appropriate halt state. */
- match_last = re_string_cur_idx (input);
+ match_last = re_string_cur_idx (mctx->input);
match = 1;
if (!fl_longest_match)
break;
@@ -823,11 +845,11 @@ static int check_halt_node_context (dfa, node, context)
match the context, return the node. */
static int
-check_halt_state_context (preg, state, input, idx, eflags)
+check_halt_state_context (preg, state, mctx, idx)
const regex_t *preg;
const re_dfastate_t *state;
- const re_string_t *input;
- int idx, eflags;
+ const re_match_context_t *mctx;
+ int idx;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i;
@@ -835,7 +857,8 @@ check_halt_state_context (preg, state, input, idx, eflags)
#ifdef DEBUG
assert (state->halt);
#endif
- context = re_string_context_at (input, idx, eflags, preg->newline_anchor);
+ context = re_string_context_at (mctx->input, idx, mctx->eflags,
+ preg->newline_anchor);
for (i = 0; i < state->nodes.nelem; ++i)
if (check_halt_node_context (dfa, state->nodes.elems[i], context))
return state->nodes.elems[i];
@@ -848,11 +871,9 @@ check_halt_state_context (preg, state, input, idx, eflags)
of errors. */
static int
-proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
+proceed_next_node (preg, mctx, pidx, node, eps_via_nodes)
const regex_t *preg;
- re_dfastate_t **state_log;
const re_match_context_t *mctx;
- const re_string_t *input;
int *pidx, node;
re_node_set *eps_via_nodes;
{
@@ -863,9 +884,9 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
err = re_node_set_insert (eps_via_nodes, node);
if (BE (err < 0, 0))
return -1;
- for (i = 0; i < state_log[*pidx]->nodes.nelem; ++i)
+ for (i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
{
- int candidate = state_log[*pidx]->nodes.elems[i];
+ int candidate = mctx->state_log[*pidx]->nodes.elems[i];
if (!re_node_set_contains (dfa->edests + node, candidate)
&& !(dfa->nodes[candidate].type == OP_CONTEXT_NODE
&& re_node_set_contains (dfa->edests + node,
@@ -892,7 +913,7 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
}
if (ACCEPT_MB_NODE (type))
- naccepted = check_node_accept_bytes (preg, entity, input, *pidx);
+ naccepted = check_node_accept_bytes (preg, entity, mctx->input, *pidx);
else if (type == OP_BACK_REF)
{
for (i = 0; i < mctx->nbkref_ents; ++i)
@@ -907,11 +928,12 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
if (BE (err < 0, 0))
return -1;
dest_node = dfa->nexts[node];
- if (re_node_set_contains (&state_log[*pidx]->nodes, dest_node))
+ if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+ dest_node))
return dest_node;
- for (i = 0; i < state_log[*pidx]->nodes.nelem; ++i)
+ for (i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
{
- dest_node = state_log[*pidx]->nodes.elems[i];
+ dest_node = mctx->state_log[*pidx]->nodes.elems[i];
if ((dfa->nodes[dest_node].type == OP_CONTEXT_NODE
&& (dfa->nexts[node]
== dfa->nodes[dest_node].opr.ctx_info->entity)))
@@ -921,13 +943,12 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
}
if (naccepted != 0
- || check_node_accept (preg, dfa->nodes + node, input, *pidx,
- mctx->eflags))
+ || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
{
dest_node = dfa->nexts[node];
*pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
#ifdef DEBUG
- assert (state_log[*pidx] != NULL);
+ assert (mctx->state_log[*pidx] != NULL);
#endif
re_node_set_empty (eps_via_nodes);
return dest_node;
@@ -946,11 +967,9 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
static reg_errcode_t
-set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
+set_regs (preg, mctx, nmatch, pmatch, last_node)
const regex_t *preg;
- re_dfastate_t **state_log;
const re_match_context_t *mctx;
- const re_string_t *input;
size_t nmatch;
regmatch_t *pmatch;
int last_node;
@@ -961,7 +980,7 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
int i;
#ifdef DEBUG
assert (nmatch > 1);
- assert (state_log != NULL);
+ assert (mctx->state_log != NULL);
#endif
cur_node = dfa->init_node;
real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
@@ -1002,8 +1021,7 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
break;
/* Proceed to next node. */
- cur_node = proceed_next_node (preg, state_log, mctx, input, &idx,
- cur_node, &eps_via_nodes);
+ cur_node = proceed_next_node (preg, mctx, &idx, cur_node, &eps_via_nodes);
if (BE (cur_node < 0, 0))
return REG_ESPACE;
}
@@ -1013,15 +1031,15 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
#define NUMBER_OF_STATE 1
-/* This function checks the STATE_LOG from the MCTX->match_last
- to MCTX->match_first and sift the nodes in each states according to
- the following rules. Updated state_log will be wrote to STATE_LOG.
+/* This function checks the STATE_LOG from the MCTX->match_last to 0
+ and sift the nodes in each states according to the following rules.
+ Updated state_log will be wrote to STATE_LOG.
Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1. When STR_IDX == MATCH_LAST(the last index in the state_log):
If `a' isn't the LAST_NODE and `a' can't epsilon transit to
the LAST_NODE, we throw away the node `a'.
- 2. When MATCH_FIRST <= STR_IDX < MATCH_LAST and `a' accepts
+ 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
string `s' and transit to `b':
i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
away the node `a'.
@@ -1037,11 +1055,9 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
((state) != NULL && re_node_set_contains (&(state)->nodes, node))
static reg_errcode_t
-sift_states_backward (preg, state_log, mctx, input, last_node)
+sift_states_backward (preg, mctx, last_node)
const regex_t *preg;
- re_dfastate_t **state_log;
const re_match_context_t *mctx;
- const re_string_t *input;
int last_node;
{
reg_errcode_t err;
@@ -1051,12 +1067,12 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
re_node_set *plog; /* Points the state_log[str_idx]->nodes */
#ifdef DEBUG
- assert (state_log != NULL && state_log[str_idx] != NULL);
+ assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
#endif
err = re_node_set_alloc (&state_buf, NUMBER_OF_STATE);
if (BE (err != REG_NOERROR, 0))
return err;
- plog = &state_log[str_idx]->nodes;
+ plog = &mctx->state_log[str_idx]->nodes;
/* Build sifted state_log[str_idx]. It has the nodes which can epsilon
transit to the last_node and the last_node itself. */
@@ -1064,7 +1080,8 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
if (BE (err != REG_NOERROR, 0))
return err;
- if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref)
+ if (mctx->state_log[str_idx] != NULL
+ && mctx->state_log[str_idx]->has_backref)
{
err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
if (BE (err != REG_NOERROR, 0))
@@ -1072,19 +1089,19 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
}
/* Update state log. */
- state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
- if (BE (state_log[str_idx] == NULL && err != REG_NOERROR, 0))
+ mctx->state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
+ if (BE (mctx->state_log[str_idx] == NULL && err != REG_NOERROR, 0))
return err;
/* Then check each states in the state_log. */
- while (str_idx > mctx->match_first)
+ while (str_idx > 0)
{
int i, j;
/* Update counters. */
re_node_set_empty (&state_buf);
--str_idx;
- plog = ((state_log[str_idx] == NULL) ? &empty_set
- : &state_log[str_idx]->nodes);
+ plog = ((mctx->state_log[str_idx] == NULL) ? &empty_set
+ : &mctx->state_log[str_idx]->nodes);
/* Then build the next sifted state.
We build the next sifted state on `state_buf', and update
@@ -1106,27 +1123,25 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
/* If the node may accept `multi byte'. */
if (ACCEPT_MB_NODE (type))
- naccepted = sift_states_iter_mb (preg, state_log, mctx, input,
- entity, str_idx,
+ naccepted = sift_states_iter_mb (preg, mctx, entity, str_idx,
mctx->match_last);
/* If the node is a back reference. */
else if (type == OP_BACK_REF)
for (j = 0; j < mctx->nbkref_ents; ++j)
{
- naccepted = sift_states_iter_bkref (dfa, state_log,
+ naccepted = sift_states_iter_bkref (dfa, mctx->state_log,
mctx->bkref_ents + j,
prev_node, str_idx,
- mctx->match_first,
mctx->match_last);
if (naccepted)
break;
}
if (!naccepted
- && check_node_accept (preg, dfa->nodes + prev_node, input,
- str_idx, mctx->eflags)
- && STATE_NODE_CONTAINS (state_log[str_idx + 1],
+ && check_node_accept (preg, dfa->nodes + prev_node, mctx,
+ str_idx)
+ && STATE_NODE_CONTAINS (mctx->state_log[str_idx + 1],
dfa->nexts[prev_node]))
naccepted = 1;
@@ -1140,7 +1155,8 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
if (BE (err != REG_NOERROR, 0))
return err;
}
- if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref)
+ if (mctx->state_log[str_idx] != NULL
+ && mctx->state_log[str_idx]->has_backref)
{
err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
if (BE (err != REG_NOERROR, 0))
@@ -1148,8 +1164,8 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
}
/* Update state_log. */
- state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
- if (BE (state_log[str_idx] == NULL && err != REG_NOERROR, 0))
+ mctx->state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
+ if (BE (mctx->state_log[str_idx] == NULL && err != REG_NOERROR, 0))
return err;
}
@@ -1159,36 +1175,44 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
/* Helper functions. */
-static inline void
-clean_state_log_if_need (state_log, mctx, next_state_log_idx)
- re_dfastate_t **state_log;
+static inline reg_errcode_t
+clean_state_log_if_need (mctx, next_state_log_idx)
re_match_context_t *mctx;
int next_state_log_idx;
{
int top = mctx->state_log_top;
+
+ if (next_state_log_idx >= mctx->input->bufs_len
+ || (next_state_log_idx >= mctx->input->valid_len
+ && mctx->input->valid_len < mctx->input->len))
+ {
+ reg_errcode_t err;
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
if (top < next_state_log_idx)
{
- memset (state_log + top + 1, '\0',
+ memset (mctx->state_log + top + 1, '\0',
sizeof (re_dfastate_t *) * (next_state_log_idx - top));
mctx->state_log_top = next_state_log_idx;
}
+ return REG_NOERROR;
}
static int
-sift_states_iter_mb (preg, state_log, mctx, input, node_idx, str_idx,
- max_str_idx)
+sift_states_iter_mb (preg, mctx, node_idx, str_idx, max_str_idx)
const regex_t *preg;
- re_dfastate_t **state_log;
const re_match_context_t *mctx;
- const re_string_t *input;
int node_idx, str_idx, max_str_idx;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int naccepted;
/* Check the node can accept `multi byte'. */
- naccepted = check_node_accept_bytes (preg, node_idx, input, str_idx);
+ naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
- !STATE_NODE_CONTAINS (state_log[str_idx + naccepted],
+ !STATE_NODE_CONTAINS (mctx->state_log[str_idx + naccepted],
dfa->nexts[node_idx]))
/* The node can't accept the `multi byte', or the
destination was already throwed away, then the node
@@ -1200,12 +1224,11 @@ sift_states_iter_mb (preg, state_log, mctx, input, node_idx, str_idx,
}
static int
-sift_states_iter_bkref (dfa, state_log, mctx_entry, node_idx, idx, match_first,
- match_last)
+sift_states_iter_bkref (dfa, state_log, mctx_entry, node_idx, idx, match_last)
const re_dfa_t *dfa;
re_dfastate_t **state_log;
struct re_backref_cache_entry *mctx_entry;
- int node_idx, idx, match_first, match_last;
+ int node_idx, idx, match_last;
{
int naccepted = 0;
int from_idx, to_idx;
@@ -1244,7 +1267,7 @@ add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
if (entry->from == entry->to && entry->from == idx)
break;
}
- if (j < mctx->nbkref_ents || idx == mctx->match_first)
+ if (j < mctx->nbkref_ents || idx == 0)
{
reg_errcode_t err;
err = re_node_set_add_intersect (state_buf, plog,
@@ -1266,30 +1289,38 @@ add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
update the destination of STATE_LOG. */
static re_dfastate_t *
-transit_state (err, preg, state, input, fl_search, state_log, mctx)
+transit_state (err, preg, mctx, state, fl_search)
reg_errcode_t *err;
const regex_t *preg;
- re_dfastate_t *state, **state_log;
- re_string_t *input;
- int fl_search;
re_match_context_t *mctx;
+ re_dfastate_t *state;
+ int fl_search;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfastate_t **trtable, *next_state;
unsigned char ch;
+ if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
+ || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
+ && mctx->input->valid_len < mctx->input->len))
+ {
+ *err = extend_buffers (mctx);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+
*err = REG_NOERROR;
if (state == NULL)
{
next_state = state;
- re_string_skip_bytes (input, 1);
+ re_string_skip_bytes (mctx->input, 1);
}
else
{
/* If the current state can accept multibyte. */
if (state->accept_mb)
{
- *err = transit_state_mb (preg, state, input, state_log, mctx);
+ *err = transit_state_mb (preg, state, mctx);
if (BE (*err != REG_NOERROR, 0))
return NULL;
}
@@ -1298,7 +1329,7 @@ transit_state (err, preg, state, input, fl_search, state_log, mctx)
if (1)
{
/* Use transition table */
- ch = re_string_fetch_byte (input);
+ ch = re_string_fetch_byte (mctx->input);
trtable = fl_search ? state->trtable_search : state->trtable;
if (trtable == NULL)
{
@@ -1313,25 +1344,24 @@ transit_state (err, preg, state, input, fl_search, state_log, mctx)
else
{
/* don't use transition table */
- next_state = transit_state_sb (err, preg, state, input, fl_search,
- mctx);
+ next_state = transit_state_sb (err, preg, state, fl_search, mctx);
if (BE (next_state == NULL && err != REG_NOERROR, 0))
return NULL;
}
}
/* Update the state_log if we need. */
- if (state_log != NULL)
+ if (mctx->state_log != NULL)
{
- int cur_idx = re_string_cur_idx (input);
+ int cur_idx = re_string_cur_idx (mctx->input);
if (cur_idx > mctx->state_log_top)
{
- state_log[cur_idx] = next_state;
+ mctx->state_log[cur_idx] = next_state;
mctx->state_log_top = cur_idx;
}
- else if (state_log[cur_idx] == 0)
+ else if (mctx->state_log[cur_idx] == 0)
{
- state_log[cur_idx] = next_state;
+ mctx->state_log[cur_idx] = next_state;
}
else
{
@@ -1342,7 +1372,7 @@ transit_state (err, preg, state, input, fl_search, state_log, mctx)
the destination of a multibyte char/collating element/
back reference. Then the next state is the union set of
these destinations and the results of the transition table. */
- pstate = state_log[cur_idx];
+ pstate = mctx->state_log[cur_idx];
log_nodes = pstate->entrance_nodes;
if (next_state != NULL)
{
@@ -1357,9 +1387,10 @@ transit_state (err, preg, state, input, fl_search, state_log, mctx)
/* Note: We already add the nodes of the initial state,
then we don't need to add them here. */
- context = re_string_context_at (input, re_string_cur_idx (input) - 1,
+ context = re_string_context_at (mctx->input,
+ re_string_cur_idx (mctx->input) - 1,
mctx->eflags, preg->newline_anchor);
- next_state = state_log[cur_idx]
+ next_state = mctx->state_log[cur_idx]
= re_acquire_state_context (err, dfa, &next_nodes, context);
/* We don't need to check errors here, since the return value of
this function is next_state and ERR is already set. */
@@ -1370,10 +1401,10 @@ transit_state (err, preg, state, input, fl_search, state_log, mctx)
/* If the next state has back references. */
if (next_state != NULL && next_state->has_backref)
{
- *err = transit_state_bkref (preg, next_state, input, state_log, mctx);
+ *err = transit_state_bkref (preg, next_state, mctx);
if (BE (*err != REG_NOERROR, 0))
return NULL;
- next_state = state_log[cur_idx];
+ next_state = mctx->state_log[cur_idx];
}
}
return next_state;
@@ -1385,18 +1416,17 @@ transit_state (err, preg, state, input, fl_search, state_log, mctx)
accepting the current input byte. */
static re_dfastate_t *
-transit_state_sb (err, preg, state, input, fl_search, mctx)
+transit_state_sb (err, preg, state, fl_search, mctx)
reg_errcode_t *err;
const regex_t *preg;
re_dfastate_t *state;
- re_string_t *input;
int fl_search;
re_match_context_t *mctx;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_node_set next_nodes;
re_dfastate_t *next_state;
- int node_cnt, cur_str_idx = re_string_cur_idx (input);
+ int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
unsigned int context;
*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
@@ -1405,8 +1435,7 @@ transit_state_sb (err, preg, state, input, fl_search, mctx)
for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
{
int cur_node = state->nodes.elems[node_cnt];
- if (check_node_accept (preg, dfa->nodes + cur_node, input,
- cur_str_idx, mctx->eflags))
+ if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
{
*err = re_node_set_merge (&next_nodes,
dfa->eclosures + dfa->nexts[cur_node]);
@@ -1434,22 +1463,21 @@ transit_state_sb (err, preg, state, input, fl_search, mctx)
return NULL;
}
}
- context = re_string_context_at (input, cur_str_idx, mctx->eflags,
+ context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
preg->newline_anchor);
next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
/* We don't need to check errors here, since the return value of
this function is next_state and ERR is already set. */
re_node_set_free (&next_nodes);
- re_string_skip_bytes (input, 1);
+ re_string_skip_bytes (mctx->input, 1);
return next_state;
}
static reg_errcode_t
-transit_state_mb (preg, pstate, input, state_log, mctx)
+transit_state_mb (preg, pstate, mctx)
const regex_t *preg;
- re_dfastate_t *pstate, **state_log;
- const re_string_t *input;
+ re_dfastate_t *pstate;
re_match_context_t *mctx;
{
reg_errcode_t err;
@@ -1466,7 +1494,8 @@ transit_state_mb (preg, pstate, input, state_log, mctx)
if (dfa->nodes[cur_node_idx].type == OP_CONTEXT_NODE)
{
- context = re_string_context_at (input, re_string_cur_idx (input),
+ context = re_string_context_at (mctx->input,
+ re_string_cur_idx (mctx->input),
mctx->eflags, preg->newline_anchor);
if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
context))
@@ -1476,14 +1505,16 @@ transit_state_mb (preg, pstate, input, state_log, mctx)
/* How many bytes the node can accepts? */
if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
- naccepted = check_node_accept_bytes (preg, cur_node_idx, input,
- re_string_cur_idx (input));
+ naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
+ re_string_cur_idx (mctx->input));
if (naccepted == 0)
continue;
/* The node can accepts `naccepted' bytes. */
- dest_idx = re_string_cur_idx (input) + naccepted;
- clean_state_log_if_need (state_log, mctx, dest_idx);
+ dest_idx = re_string_cur_idx (mctx->input) + naccepted;
+ err = clean_state_log_if_need (mctx, dest_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
#ifdef DEBUG
assert (dfa->nexts[cur_node_idx] != -1);
#endif
@@ -1491,7 +1522,7 @@ transit_state_mb (preg, pstate, input, state_log, mctx)
then we use pstate->nodes.elems[i] instead. */
new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
- dest_state = state_log[dest_idx];
+ dest_state = mctx->state_log[dest_idx];
if (dest_state == NULL)
dest_nodes = *new_nodes;
else
@@ -1501,11 +1532,11 @@ transit_state_mb (preg, pstate, input, state_log, mctx)
if (BE (err != REG_NOERROR, 0))
return err;
}
- context = re_string_context_at (input, dest_idx - 1, mctx->eflags,
+ context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
preg->newline_anchor);
- state_log[dest_idx] = re_acquire_state_context (&err, dfa, &dest_nodes,
- context);
- if (BE (state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
+ mctx->state_log[dest_idx]
+ = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+ if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
return err;
if (dest_state != NULL)
re_node_set_free (&dest_nodes);
@@ -1514,24 +1545,20 @@ transit_state_mb (preg, pstate, input, state_log, mctx)
}
static reg_errcode_t
-transit_state_bkref (preg, pstate, input, state_log, mctx)
+transit_state_bkref (preg, pstate, mctx)
const regex_t *preg;
- re_dfastate_t *pstate, **state_log;
- const re_string_t *input;
+ re_dfastate_t *pstate;
re_match_context_t *mctx;
{
reg_errcode_t err;
re_dfastate_t **work_state_log;
-#ifdef DEBUG
- assert (mctx->match_first != -1);
-#endif
- work_state_log = re_malloc (re_dfastate_t *, re_string_cur_idx (input) + 1);
+ work_state_log = re_malloc (re_dfastate_t *,
+ re_string_cur_idx (mctx->input) + 1);
if (BE (work_state_log == NULL, 0))
return REG_ESPACE;
- err = transit_state_bkref_loop (preg, input, &pstate->nodes, work_state_log,
- state_log, mctx);
+ err = transit_state_bkref_loop (preg, &pstate->nodes, work_state_log, mctx);
re_free (work_state_log);
return err;
}
@@ -1539,23 +1566,24 @@ transit_state_bkref (preg, pstate, input, state_log, mctx)
/* Caller must allocate `work_state_log'. */
static reg_errcode_t
-transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
+transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
const regex_t *preg;
- const re_string_t *input;
re_node_set *nodes;
- re_dfastate_t **work_state_log, **state_log;
+ re_dfastate_t **work_state_log;
re_match_context_t *mctx;
{
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i, j;
+ re_dfastate_t **state_log_bak;
regmatch_t *cur_regs = re_malloc (regmatch_t, preg->re_nsub + 1);
- int cur_str_idx = re_string_cur_idx (input);
+ int cur_str_idx = re_string_cur_idx (mctx->input);
if (BE (cur_regs == NULL, 0))
return REG_ESPACE;
for (i = 0; i < nodes->nelem; ++i)
{
+ unsigned char *buf;
int dest_str_idx, subexp_idx, prev_nelem, subexp_len;
int node_idx = nodes->elems[i];
unsigned int context;
@@ -1569,8 +1597,8 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
else if (node->type == OP_CONTEXT_NODE &&
dfa->nodes[node->opr.ctx_info->entity].type == OP_BACK_REF)
{
- context = re_string_context_at (input, cur_str_idx, mctx->eflags,
- preg->newline_anchor);
+ context = re_string_context_at (mctx->input, cur_str_idx,
+ mctx->eflags, preg->newline_anchor);
if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
continue;
subexp_idx = dfa->nodes[node->opr.ctx_info->entity].opr.idx;
@@ -1580,29 +1608,37 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
/* `node' is a backreference.
At first, set registers to check the backreference. */
- cur_regs[0].rm_so = mctx->match_first;
+ cur_regs[0].rm_so = 0;
cur_regs[0].rm_eo = cur_str_idx;
- memcpy (work_state_log + mctx->match_first,
- state_log + mctx->match_first,
- sizeof (re_dfastate_t *)
- * (cur_str_idx - mctx->match_first + 1));
+ memcpy (work_state_log, mctx->state_log,
+ sizeof (re_dfastate_t *) * (cur_str_idx + 1));
mctx->match_last = cur_str_idx;
- sift_states_backward (preg, work_state_log, mctx, input, node_idx);
- if (!STATE_NODE_CONTAINS (work_state_log[mctx->match_first],
- dfa->init_node))
+ state_log_bak = mctx->state_log;
+ mctx->state_log = work_state_log;
+ sift_states_backward (preg, mctx, node_idx);
+ if (!STATE_NODE_CONTAINS (work_state_log[0], dfa->init_node))
continue;
for (j = 1; j <= preg->re_nsub; ++j)
cur_regs[j].rm_so = cur_regs[j].rm_eo = -1;
- set_regs (preg, work_state_log, mctx, input,
- subexp_idx + 1, cur_regs, node_idx);
+ set_regs (preg, mctx, subexp_idx + 1, cur_regs, node_idx);
+ mctx->state_log = state_log_bak;
/* Then check that the backreference can match the input string. */
subexp_len = cur_regs[subexp_idx].rm_eo - cur_regs[subexp_idx].rm_so;
- if (subexp_len < 0
- || (strncmp ((re_string_get_buffer (input)
- + cur_regs[subexp_idx].rm_so),
- re_string_get_buffer (input) + cur_str_idx, subexp_len)
- != 0))
+ if (subexp_len < 0 || cur_str_idx + subexp_len > mctx->input->len)
+ continue;
+
+ if (cur_str_idx + subexp_len > mctx->input->valid_len
+ && mctx->input->valid_len < mctx->input->len)
+ {
+ reg_errcode_t err;
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ buf = re_string_get_buffer (mctx->input);
+ if (strncmp (buf + cur_regs[subexp_idx].rm_so, buf + cur_str_idx,
+ subexp_len) != 0)
continue;
/* Successfully matched, add a new cache entry. */
@@ -1610,7 +1646,9 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
err = match_ctx_add_entry (mctx, node_idx, cur_str_idx, dest_str_idx);
if (BE (err != REG_NOERROR, 0))
return err;
- clean_state_log_if_need (state_log, mctx, dest_str_idx);
+ err = clean_state_log_if_need (mctx, dest_str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
/* And add the epsilon closures (which is `new_dest_nodes') of
the backreference to appropriate state_log. */
@@ -1621,19 +1659,20 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
new_dest_nodes = dfa->nodes[node_idx].opr.ctx_info->bkref_eclosure;
else
new_dest_nodes = dfa->eclosures + dfa->nexts[node_idx];
- context = (IS_WORD_CHAR (re_string_byte_at (input, dest_str_idx - 1))
+ context = (IS_WORD_CHAR (re_string_byte_at (mctx->input,
+ dest_str_idx - 1))
? CONTEXT_WORD : 0);
- dest_state = state_log[dest_str_idx];
+ dest_state = mctx->state_log[dest_str_idx];
- prev_nelem = ((state_log[cur_str_idx] == NULL) ? 0
- : state_log[cur_str_idx]->nodes.nelem);
+ prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
+ : mctx->state_log[cur_str_idx]->nodes.nelem);
/* Add `new_dest_node' to state_log. */
if (dest_state == NULL)
{
- state_log[dest_str_idx] = re_acquire_state_context (&err, dfa,
- new_dest_nodes,
- context);
- if (BE (state_log[dest_str_idx] == NULL && err != REG_NOERROR, 0))
+ mctx->state_log[dest_str_idx]
+ = re_acquire_state_context (&err, dfa, new_dest_nodes, context);
+ if (BE (mctx->state_log[dest_str_idx] == NULL
+ && err != REG_NOERROR, 0))
return err;
}
else
@@ -1643,20 +1682,21 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
new_dest_nodes);
if (BE (err != REG_NOERROR, 0))
return err;
- state_log[dest_str_idx] = re_acquire_state_context (&err, dfa,
- &dest_nodes,
- context);
- if (BE (state_log[dest_str_idx] == NULL && err != REG_NOERROR, 0))
+ mctx->state_log[dest_str_idx]
+ = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+ if (BE (mctx->state_log[dest_str_idx] == NULL
+ && err != REG_NOERROR, 0))
return err;
re_node_set_free (&dest_nodes);
}
/* We need to check recursively if the backreference can epsilon
transit. */
- if (subexp_len == 0 && state_log[cur_str_idx]->nodes.nelem > prev_nelem)
+ if (subexp_len == 0
+ && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
{
- err = transit_state_bkref_loop (preg, input, new_dest_nodes,
- work_state_log, state_log, mctx);
+ err = transit_state_bkref_loop (preg, new_dest_nodes, work_state_log,
+ mctx);
if (BE (err != REG_NOERROR, 0))
return err;
}
@@ -2170,11 +2210,11 @@ find_collation_sequence_value (mbs, mbs_len)
byte of the INPUT. */
static int
-check_node_accept (preg, node, input, idx, eflags)
+check_node_accept (preg, node, mctx, idx)
const regex_t *preg;
const re_token_t *node;
- const re_string_t *input;
- int idx, eflags;
+ const re_match_context_t *mctx;
+ int idx;
{
const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
const re_token_t *cur_node;
@@ -2183,7 +2223,8 @@ check_node_accept (preg, node, input, idx, eflags)
{
/* The node has constraints. Check whether the current context
satisfies the constraints. */
- unsigned int context = re_string_context_at (input, idx, eflags,
+ unsigned int context = re_string_context_at (mctx->input, idx,
+ mctx->eflags,
preg->newline_anchor);
if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
return 0;
@@ -2192,7 +2233,7 @@ check_node_accept (preg, node, input, idx, eflags)
else
cur_node = node;
- ch = re_string_byte_at (input, idx);
+ ch = re_string_byte_at (mctx->input, idx);
if (cur_node->type == CHARACTER)
return cur_node->opr.c == ch;
else if (cur_node->type == SIMPLE_BRACKET)
@@ -2203,17 +2244,69 @@ check_node_accept (preg, node, input, idx, eflags)
else
return 0;
}
+
+/* Extend the buffers, if the buffers have run out. */
+
+static reg_errcode_t
+extend_buffers (mctx)
+ re_match_context_t *mctx;
+{
+ reg_errcode_t ret;
+ re_string_t *pstr = mctx->input;
+
+ /* Double the lengthes of the buffers. */
+ ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ if (mctx->state_log != NULL)
+ {
+ /* And double the length of state_log. */
+ mctx->state_log = re_realloc (mctx->state_log, re_dfastate_t *,
+ pstr->bufs_len * 2);
+ if (BE (mctx->state_log == NULL, 0))
+ return REG_ESPACE;
+ }
+
+ /* Then reconstruct the buffers. */
+ if (pstr->icase)
+ {
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ build_wcs_upper_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ build_upper_buffer (pstr);
+ }
+ else
+ {
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ build_wcs_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ else
+ pstr->valid_len = pstr->bufs_len;
+ }
+ }
+ return REG_NOERROR;
+}
+
/* Functions for matching context. */
static reg_errcode_t
-match_ctx_init (mctx, eflags, n)
+match_ctx_init (mctx, eflags, input, n)
re_match_context_t *mctx;
- int eflags;
- int n;
+ int eflags, n;
+ re_string_t *input;
{
mctx->eflags = eflags;
- mctx->match_first = mctx->match_last = -1;
+ mctx->input = input;
+ mctx->match_last = -1;
if (n > 0)
{
mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);