aboutsummaryrefslogtreecommitdiff
path: root/posix
diff options
context:
space:
mode:
Diffstat (limited to 'posix')
-rw-r--r--posix/regex_internal.c42
-rw-r--r--posix/regex_internal.h15
-rw-r--r--posix/regexec.c69
3 files changed, 83 insertions, 43 deletions
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index c4400a8c23..116543a6da 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -24,8 +24,13 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <wchar.h>
-#include <wctype.h>
+
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
#ifdef _LIBC
# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
@@ -99,7 +104,8 @@ re_string_allocate (pstr, str, len, init_len, trans, icase)
if (BE (ret != REG_NOERROR, 0))
return ret;
- pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case : (char *) str);
+ pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
+ : (unsigned char *) str);
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
|| MB_CUR_MAX > 1) ? pstr->valid_len : len;
@@ -127,7 +133,8 @@ re_string_construct (pstr, str, len, trans, icase)
if (BE (ret != REG_NOERROR, 0))
return ret;
}
- pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case : (char *) str);
+ pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
+ : (unsigned char *) str);
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
if (icase)
@@ -176,13 +183,13 @@ re_string_realloc_buffers (pstr, new_buf_len)
#endif /* RE_ENABLE_I18N */
if (MBS_ALLOCATED (pstr))
{
- pstr->mbs = re_realloc (pstr->mbs, char, new_buf_len);
+ pstr->mbs = re_realloc (pstr->mbs, unsigned char, new_buf_len);
if (BE (pstr->mbs == NULL, 0))
return REG_ESPACE;
}
if (MBS_CASE_ALLOCATED (pstr))
{
- pstr->mbs_case = re_realloc (pstr->mbs_case, char, new_buf_len);
+ pstr->mbs_case = re_realloc (pstr->mbs_case, unsigned char, new_buf_len);
if (BE (pstr->mbs_case == NULL, 0))
return REG_ESPACE;
if (!MBS_ALLOCATED (pstr))
@@ -202,7 +209,7 @@ re_string_construct_common (str, len, pstr, trans, icase)
int icase;
{
memset (pstr, '\0', sizeof (re_string_t));
- pstr->raw_mbs = str;
+ pstr->raw_mbs = (const unsigned char *) str;
pstr->len = len;
pstr->trans = trans;
pstr->icase = icase ? 1 : 0;
@@ -235,8 +242,8 @@ build_wcs_buffer (pstr)
wchar_t wc;
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
- mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
- remain_len, &pstr->cur_state);
+ mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ + byte_idx), remain_len, &pstr->cur_state);
if (BE (mbclen == (size_t) -2, 0))
{
/* The buffer doesn't have enough space, finish to build. */
@@ -254,9 +261,8 @@ build_wcs_buffer (pstr)
/* Apply the translateion if we need. */
if (pstr->trans != NULL && mbclen == 1)
{
- int ch = *((unsigned char *) pstr->raw_mbs + pstr->raw_mbs_idx
- + byte_idx);
- pstr->mbs_case[byte_idx] = pstr->trans[ch];
+ int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
+ pstr->mbs_case[byte_idx] = ch;
}
/* Write wide character and padding. */
pstr->wcs[byte_idx++] = wc;
@@ -284,8 +290,8 @@ build_wcs_upper_buffer (pstr)
wchar_t wc;
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
- mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
- remain_len, &pstr->cur_state);
+ mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ + byte_idx), remain_len, &pstr->cur_state);
if (BE (mbclen == (size_t) -2, 0))
{
/* The buffer doesn't have enough space, finish to build. */
@@ -310,7 +316,7 @@ build_wcs_upper_buffer (pstr)
else /* mbclen > 1 */
{
if (iswlower (wc))
- wcrtomb (pstr->mbs + byte_idx, towupper (wc), &prev_st);
+ wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
else
memcpy (pstr->mbs + byte_idx,
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
@@ -340,7 +346,7 @@ re_string_skip_chars (pstr, new_raw_idx)
{
int remain_len = pstr->len - rawbuf_idx;
prev_st = pstr->cur_state;
- mbclen = mbrlen (pstr->raw_mbs + rawbuf_idx, remain_len,
+ mbclen = mbrlen ((const char *) pstr->raw_mbs + rawbuf_idx, remain_len,
&pstr->cur_state);
if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
{
@@ -420,9 +426,9 @@ re_string_reconstruct (pstr, idx, eflags, newline)
pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
: CONTEXT_NEWLINE | CONTEXT_BEGBUF);
if (!MBS_CASE_ALLOCATED (pstr))
- pstr->mbs_case = (char *) pstr->raw_mbs;
+ pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
- pstr->mbs = (char *) pstr->raw_mbs;
+ pstr->mbs = (unsigned char *) pstr->raw_mbs;
offset = idx;
}
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 574bf652b8..9f1f9826f2 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -228,15 +228,15 @@ struct re_string_t
{
/* Indicate the raw buffer which is the original string passed as an
argument of regexec(), re_search(), etc.. */
- const char *raw_mbs;
+ const unsigned char *raw_mbs;
/* Store the multibyte string. In case of "case insensitive mode" like
REG_ICASE, upper cases of the string are stored, otherwise MBS points
the same address that RAW_MBS points. */
- char *mbs;
+ unsigned char *mbs;
/* Store the case sensitive multibyte string. In case of
"case insensitive mode", the original string are stored,
otherwise MBS_CASE points the same address that MBS points. */
- char *mbs_case;
+ unsigned char *mbs_case;
#ifdef RE_ENABLE_I18N
/* Store the wide character string which is corresponding to MBS. */
wint_t *wcs;
@@ -512,7 +512,7 @@ typedef struct
union
{
unsigned char ch;
- char *name;
+ unsigned char *name;
wchar_t wch;
} opr;
} bracket_elem_t;
@@ -580,7 +580,7 @@ re_string_elem_size_at (pstr, idx)
int idx;
{
#ifdef _LIBC
- const char *extra, *p;
+ const unsigned char *p, *extra;
const int32_t *table, *indirect;
int32_t tmp;
# include <locale/weight.h>
@@ -589,11 +589,12 @@ re_string_elem_size_at (pstr, idx)
if (nrules != 0)
{
table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
- extra = (const char *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
_NL_COLLATE_INDIRECTMB);
p = pstr->mbs + idx;
- tmp = findidx ((const unsigned char **) &p);
+ tmp = findidx (&p);
return p - pstr->mbs - idx;
}
else
diff --git a/posix/regexec.c b/posix/regexec.c
index 4a9c64a191..142127883d 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -23,8 +23,13 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <wchar.h>
-#include <wctype.h>
+
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
#ifdef _LIBC
# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
@@ -123,7 +128,7 @@ static re_dfastate_t **build_trtable (const regex_t *dfa,
static int check_node_accept_bytes (const regex_t *preg, int node_idx,
const re_string_t *input, int idx);
# ifdef _LIBC
-static unsigned int find_collation_sequence_value (const char *mbs,
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
size_t name_len);
# endif /* _LIBC */
#endif /* RE_ENABLE_I18N */
@@ -1674,7 +1679,7 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
if (BE (err != REG_NOERROR, 0))
return err;
}
- buf = re_string_get_buffer (mctx->input);
+ buf = (char *) re_string_get_buffer (mctx->input);
if (strncmp (buf + cur_regs[subexp_idx].rm_so, buf + cur_str_idx,
subexp_len) != 0)
continue;
@@ -1855,27 +1860,51 @@ build_trtable (preg, state, fl_search)
}
/* Update the transition table. */
+ /* For all characters ch...: */
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
for (j = 0; j < UINT_BITS; ++j, ++ch)
if ((acceptable[i] >> j) & 1)
{
+ /* The current state accepts the character ch. */
if (IS_WORD_CHAR (ch))
{
for (k = 0; k < ndests; ++k)
if ((dests_ch[k][i] >> j) & 1)
- trtable[ch] = dest_states_word[k];
+ {
+ /* k-th destination accepts the word character ch. */
+ trtable[ch] = dest_states_word[k];
+ /* There must be only one destination which accepts
+ character ch. See group_nodes_into_DFAstates. */
+ break;
+ }
}
else /* not WORD_CHAR */
{
for (k = 0; k < ndests; ++k)
if ((dests_ch[k][i] >> j) & 1)
- trtable[ch] = dest_states[k];
+ {
+ /* k-th destination accepts the non-word character ch. */
+ trtable[ch] = dest_states[k];
+ /* There must be only one destination which accepts
+ character ch. See group_nodes_into_DFAstates. */
+ break;
+ }
}
}
/* new line */
- for (k = 0; k < ndests; ++k)
- if (bitset_contain (acceptable, NEWLINE_CHAR))
- trtable[NEWLINE_CHAR] = dest_states_nl[k];
+ if (bitset_contain (acceptable, NEWLINE_CHAR))
+ {
+ /* The current state accepts newline character. */
+ for (k = 0; k < ndests; ++k)
+ if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
+ {
+ /* k-th destination accepts newline character. */
+ trtable[NEWLINE_CHAR] = dest_states_nl[k];
+ /* There must be only one destination which accepts
+ newline. See group_nodes_into_DFAstates. */
+ break;
+ }
+ }
re_free (dest_states_nl);
re_free (dest_states_word);
@@ -2069,7 +2098,7 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
{
const re_charset_t *cset = node->opr.mbcset;
# ifdef _LIBC
- const char *pin = re_string_get_buffer (input) + str_idx;
+ const unsigned char *pin = re_string_get_buffer (input) + str_idx;
# endif /* _LIBC */
int match_len = 0;
wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
@@ -2098,17 +2127,19 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
{
unsigned int in_collseq = 0;
const int32_t *table, *indirect;
- const char *weights, *extra, *collseqwc;
+ const unsigned char *weights, *extra;
+ const char *collseqwc;
int32_t idx;
/* This #include defines a local function! */
# include <locale/weight.h>
/* match with collating_symbol? */
if (cset->ncoll_syms)
- extra = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
for (i = 0; i < cset->ncoll_syms; ++i)
{
- const char *coll_sym = extra + cset->coll_syms[i];
+ const unsigned char *coll_sym = extra + cset->coll_syms[i];
/* Compare the length of input collating element and
the length of current collating element. */
if (*coll_sym != elem_len)
@@ -2147,11 +2178,13 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
/* match with equivalence_class? */
if (cset->nequiv_classes)
{
- const unsigned char *cp = (const unsigned char *) pin;
+ const unsigned char *cp = pin;
table = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
- weights = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
- extra = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ weights = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
indirect = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
idx = findidx (&cp);
@@ -2215,7 +2248,7 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
# ifdef _LIBC
static unsigned int
find_collation_sequence_value (mbs, mbs_len)
- const char *mbs;
+ const unsigned char *mbs;
size_t mbs_len;
{
uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -2226,7 +2259,7 @@ find_collation_sequence_value (mbs, mbs_len)
/* No valid character. Match it as a single byte character. */
const unsigned char *collseq = (const unsigned char *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
- return collseq[*(unsigned char *) mbs];
+ return collseq[mbs[0]];
}
return UINT_MAX;
}