From 434d3784f194e382d86edd72c9c6a1d0051a7b96 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 26 Apr 2002 20:52:02 +0000 Subject: Update. 2002-04-26 Isamu Hasegawa * posix/regcomp.c (re_compile_fastmap_iter): Fix fastmap in case of not _LIBC and RE_ENABLE_I18N. (build_range_exp): Implement for not _LIBC. (build_collating_symbol): Likewise. (parse_bracket_exp): Unify redundant error handlings. Don't erase mbcset for non matching list in multibyte envs. (build_word_op): Add '_' to matching list for \w operator. * posix/regex_internal.c (re_string_construct): Invoke build_upper_buffer in case of not RE_ENABLE_I18N. (re_string_reconstruct): Don't touch cur_state in case of not RE_ENABLE_I18N. * posix/regex_internal.h (attribute_hidden): New macro in case of not _LIBC. (re_charset_t): Define range_starts/ends in case of not _LIBC. * posix/regexec.c (sift_states_iter_mb): Hide in case of not RE_ENABLE_I18N. (transit_state_mb): Likewise. (check_node_accept_bytes): Implement the code evaluating range expression in case of not _LIBC. (find_collation_sequence_value): Hide in case of not _LIBC. 2002-04-26 Jakub Jelinek * sysdeps/unix/sysv/linux/sparc/sparc32/semctl.c: Copied from i386/semctl.c. (__old_semctl, __new_semctl): Only use va_arg if the argument will be used. --- ChangeLog | 30 ++++ posix/regcomp.c | 227 ++++++++++++++++++++----- posix/regex_internal.c | 9 +- posix/regex_internal.h | 8 +- posix/regexec.c | 123 +++++++++----- sysdeps/unix/sysv/linux/sparc/sparc32/semctl.c | 206 +++++++++++++++++++++- 6 files changed, 513 insertions(+), 90 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3e32c63816..36da37c016 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,33 @@ +2002-04-26 Isamu Hasegawa + + * posix/regcomp.c (re_compile_fastmap_iter): Fix fastmap in case of + not _LIBC and RE_ENABLE_I18N. + (build_range_exp): Implement for not _LIBC. + (build_collating_symbol): Likewise. + (parse_bracket_exp): Unify redundant error handlings. + Don't erase mbcset for non matching list in multibyte envs. + (build_word_op): Add '_' to matching list for \w operator. + * posix/regex_internal.c (re_string_construct): Invoke + build_upper_buffer in case of not RE_ENABLE_I18N. + (re_string_reconstruct): Don't touch cur_state in case of not + RE_ENABLE_I18N. + * posix/regex_internal.h (attribute_hidden): New macro in case of + not _LIBC. + (re_charset_t): Define range_starts/ends in case of not _LIBC. + * posix/regexec.c (sift_states_iter_mb): Hide in case of not + RE_ENABLE_I18N. + (transit_state_mb): Likewise. + (check_node_accept_bytes): Implement the code evaluating range + expression in case of not _LIBC. + (find_collation_sequence_value): Hide in case of not _LIBC. + +2002-04-26 Jakub Jelinek + + * sysdeps/unix/sysv/linux/sparc/sparc32/semctl.c: Copied from + i386/semctl.c. + (__old_semctl, __new_semctl): Only use va_arg if the argument will + be used. + 2002-04-26 Ulrich Drepper * sysdeps/ia64/fpu/s_isinf.S: Fix definition of _internal names. diff --git a/posix/regcomp.c b/posix/regcomp.c index 149814cf98..59836b15e0 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -114,6 +114,16 @@ static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, re_token_t *token); +#ifndef _LIBC +static reg_errcode_t build_range_exp (re_charset_t *mbcset, + re_bitset_ptr_t sbcset, int *range_alloc, + bracket_elem_t *start_elem, + bracket_elem_t *end_elem); +static reg_errcode_t build_collating_symbol (re_charset_t *mbcset, + re_bitset_ptr_t sbcset, + int *coll_sym_alloc, + unsigned char *name); +#endif /* not _LIBC */ static reg_errcode_t build_equiv_class (re_charset_t *mbcset, re_bitset_ptr_t sbcset, int *equiv_class_alloc, @@ -354,7 +364,14 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) if (table[ch] < 0) fastmap[ch] = 1; } -#endif +#else +# ifdef RE_ENABLE_I18N + if (MB_CUR_MAX > 1) + for (i = 0; i < SBC_MAX; ++i) + if (__btowc (i) == WEOF) + fastmap[i] = 1; +# endif /* RE_ENABLE_I18N */ +#endif /* not _LIBC */ } for (i = 0; i < cset->nmbchars; ++i) { @@ -2207,6 +2224,136 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err) I'm not sure, but maybe enough. */ #define BRACKET_NAME_BUF_SIZE 32 +#ifndef _LIBC + /* Local function for parse_bracket_exp only used in case of NOT _LIBC. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + +static reg_errcode_t +build_range_exp (mbcset, sbcset, range_alloc, start_elem, end_elem) + re_charset_t *mbcset; + re_bitset_ptr_t sbcset; + int *range_alloc; + bracket_elem_t *start_elem, *end_elem; +{ + unsigned int start_ch, end_ch; + /* Equivalence Classes and Character Classes can't be a range start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + /* We can handle no multi character collating elements without libc + support. */ + if (BE ((start_elem->type == COLL_SYM && strlen (start_elem->opr.name) > 1) + || (end_elem->type == COLL_SYM && strlen (end_elem->opr.name) > 1), + 0)) + return REG_ECOLLATE; + +# ifdef RE_ENABLE_I18N + { + wchar_t wc, start_wc, end_wc; + wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + + start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) + ? __btowc (start_ch) : start_elem->opr.wch); + end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) + ? __btowc (end_ch) : end_elem->opr.wch); + cmp_buf[0] = start_wc; + cmp_buf[4] = end_wc; + if (wcscoll (cmp_buf, cmp_buf + 4) > 0) + return REG_ERANGE; + + /* Check the space of the arrays. */ + if (*range_alloc == mbcset->nranges) + { + /* There are not enough space, need realloc. */ + wchar_t *new_array_start, *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + /* Use realloc since mbcset->range_starts and mbcset->range_ends + are NULL if *range_alloc == 0. */ + new_array_start = re_realloc (mbcset->range_starts, wchar_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, wchar_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_wc; + mbcset->range_ends[mbcset->nranges++] = end_wc; + + /* Build the table for single byte characters. */ + for (wc = 0; wc <= SBC_MAX; ++wc) + { + cmp_buf[2] = wc; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + bitset_set (sbcset, wc); + } + } +# else /* not RE_ENABLE_I18N */ + { + unsigned int ch; + start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + if (start_ch > end_ch) + return REG_ERANGE; + /* Build the table for single byte characters. */ + for (ch = 0; ch <= SBC_MAX; ++ch) + if (start_ch <= ch && ch <= end_ch) + bitset_set (sbcset, ch); + } +# endif /* not RE_ENABLE_I18N */ + return REG_NOERROR; +} +#endif /* not _LIBC */ + +#ifndef _LIBC +/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument since we may update it. */ + +static reg_errcode_t +build_collating_symbol (mbcset, sbcset, coll_sym_alloc, name) + re_charset_t *mbcset; + re_bitset_ptr_t sbcset; + int *coll_sym_alloc; + unsigned char *name; +{ + if (BE (strlen (name) != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } +} +#endif /* not _LIBC */ + /* This function parse bracket expression like "[abc]", "[a-c]", "[[.a-a.]]" etc. */ @@ -2225,7 +2372,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) const int32_t *symb_table; const unsigned char *extra; - /* Local function for parse_bracket_exp. + /* Local function for parse_bracket_exp used in _LIBC environement. Seek the collating symbol entry correspondings to NAME. Return the index of the symbol in the SYMB_TABLE. */ @@ -2257,7 +2404,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) return elem; } - /* Local function for parse_bracket_exp. + /* Local function for parse_bracket_exp used in _LIBC environement. Look up the collation sequence value of BR_ELEM. Return the value if succeeded, UINT_MAX otherwise. */ @@ -2321,7 +2468,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) return UINT_MAX; } - /* Local function for parse_bracket_exp. + /* Local function for parse_bracket_exp used in _LIBC environement. Build the range expression which starts from START_ELEM, and ends at END_ELEM. The result are written to MBCSET and SBCSET. RANGE_ALLOC is the allocated size of mbcset->range_starts, and @@ -2364,6 +2511,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) *range_alloc = new_nranges; } + /* Equivalence Classes and Character Classes can't be a range + start/end. */ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, 0)) @@ -2397,9 +2546,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) } return REG_NOERROR; } -#endif - /* Local function for parse_bracket_exp. + /* Local function for parse_bracket_exp used in _LIBC environement. Build the collating element which is represented by NAME. The result are written to MBCSET and SBCSET. COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a @@ -2412,7 +2560,6 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) int *coll_sym_alloc; unsigned char *name; { -#ifdef _LIBC int32_t elem, idx; if (nrules != 0) { @@ -2452,7 +2599,6 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) return REG_NOERROR; } else -#endif { if (BE (strlen (name) != 1, 0)) return REG_ECOLLATE; @@ -2463,6 +2609,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) } } } +#endif + re_token_t br_token; re_bitset_ptr_t sbcset; re_charset_t *mbcset; @@ -2497,10 +2645,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) token_len = peek_token_bracket (token, regexp, syntax); if (BE (token->type == END_OF_RE, 0)) { - re_free (sbcset); - free_charset (mbcset); *err = REG_BADPAT; - return NULL; + goto parse_bracket_exp_free_return; } if (token->type == OP_NON_MATCH_LIST) { @@ -2512,10 +2658,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) token_len = peek_token_bracket (token, regexp, syntax); if (BE (token->type == END_OF_RE, 0)) { - re_free (sbcset); - free_charset (mbcset); *err = REG_BADPAT; - return NULL; + goto parse_bracket_exp_free_return; } if (MB_CUR_MAX > 1) for (i = 0; i < SBC_MAX; ++i) @@ -2541,19 +2685,15 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) syntax); if (BE (ret != REG_NOERROR, 0)) { - re_free (sbcset); - free_charset (mbcset); *err = ret; - return NULL; + goto parse_bracket_exp_free_return; } token_len = peek_token_bracket (token, regexp, syntax); if (BE (token->type == END_OF_RE, 0)) { - re_free (sbcset); - free_charset (mbcset); *err = REG_BADPAT; - return NULL; + goto parse_bracket_exp_free_return; } if (token->type == OP_CHARSET_RANGE) { @@ -2561,10 +2701,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) token_len2 = peek_token_bracket (&token2, regexp, syntax); if (BE (token->type == END_OF_RE, 0)) { - re_free (sbcset); - free_charset (mbcset); *err = REG_BADPAT; - return NULL; + goto parse_bracket_exp_free_return; } if (token2.type == OP_CLOSE_BRACKET) { @@ -2583,28 +2721,20 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) dfa, syntax); if (BE (ret != REG_NOERROR, 0)) { - re_free (sbcset); - free_charset (mbcset); *err = ret; - return NULL; + goto parse_bracket_exp_free_return; } token_len = peek_token_bracket (token, regexp, syntax); if (BE (token->type == END_OF_RE, 0)) { - re_free (sbcset); - free_charset (mbcset); *err = REG_BADPAT; - return NULL; + goto parse_bracket_exp_free_return; } *err = build_range_exp (mbcset, sbcset, &range_alloc, &start_elem, &end_elem); if (BE (*err != REG_NOERROR, 0)) - { - re_free (sbcset); - free_charset (mbcset); - return NULL; - } + goto parse_bracket_exp_free_return; } else { @@ -2632,21 +2762,13 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) *err = build_equiv_class (mbcset, sbcset, &equiv_class_alloc, start_elem.opr.name); if (BE (*err != REG_NOERROR, 0)) - { - re_free (sbcset); - free_charset (mbcset); - return NULL; - } + goto parse_bracket_exp_free_return; break; case COLL_SYM: *err = build_collating_symbol (mbcset, sbcset, &coll_sym_alloc, start_elem.opr.name); if (BE (*err != REG_NOERROR, 0)) - { - re_free (sbcset); - free_charset (mbcset); - return NULL; - } + goto parse_bracket_exp_free_return; break; case CHAR_CLASS: ret = build_charclass (mbcset, sbcset, &char_class_alloc, @@ -2678,7 +2800,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) goto parse_bracket_exp_espace; if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes - || mbcset->nranges || (mbcset->nchar_classes && MB_CUR_MAX > 1)) + || mbcset->nranges || (MB_CUR_MAX > 1 && (mbcset->nchar_classes + || mbcset->non_match))) { re_token_t alt_token; bin_tree_t *mbc_tree; @@ -2704,11 +2827,15 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) } parse_bracket_exp_espace: - free_charset (mbcset); *err = REG_ESPACE; + parse_bracket_exp_free_return: + re_free (sbcset); + free_charset (mbcset); return NULL; } +/* Parse an element in the bracket expression. */ + static reg_errcode_t parse_bracket_element (elem, regexp, token, token_len, dfa, syntax) bracket_elem_t *elem; @@ -2738,6 +2865,10 @@ parse_bracket_element (elem, regexp, token, token_len, dfa, syntax) return REG_NOERROR; } +/* Parse a bracket symbol in the bracket expression. Bracket symbols are + such as [::], [..], and + [==]. */ + static reg_errcode_t parse_bracket_symbol (elem, regexp, token) bracket_elem_t *elem; @@ -2968,10 +3099,12 @@ build_word_op (dfa, not, err) if (syntax & RE_HAT_LISTS_NOT_NEWLINE) bitset_set(cset->sbcset, '\0'); */ +#ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) for (i = 0; i < SBC_MAX; ++i) if (__btowc (i) == WEOF) bitset_set (sbcset, i); +#endif /* RE_ENABLE_I18N */ } /* We don't care the syntax in this case. */ @@ -2983,6 +3116,8 @@ build_word_op (dfa, not, err) *err = REG_ESPACE; return NULL; } + /* \w match '_' also. */ + bitset_set (sbcset, '_'); /* If it is non-matching list. */ if (mbcset->non_match) diff --git a/posix/regex_internal.c b/posix/regex_internal.c index b688d0f7d9..5327c265c2 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -60,7 +60,9 @@ static void re_string_construct_common (const unsigned char *str, int len, re_string_t *pstr, RE_TRANSLATE_TYPE trans, int icase); +#ifdef RE_ENABLE_I18N static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx); +#endif /* RE_ENABLE_I18N */ static re_dfastate_t *create_newstate_common (re_dfa_t *dfa, const re_node_set *nodes, unsigned int hash); @@ -134,8 +136,8 @@ re_string_construct (pstr, str, len, trans, icase) if (MB_CUR_MAX > 1) build_wcs_upper_buffer (pstr); else - build_upper_buffer (pstr); #endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); } else { @@ -409,7 +411,10 @@ re_string_reconstruct (pstr, idx, eflags, newline) if (offset < 0) { /* Reset buffer. */ - memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); +#ifdef RE_ENABLE_I18N + if (MB_CUR_MAX > 1) + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); +#endif /* RE_ENABLE_I18N */ pstr->valid_len = pstr->raw_mbs_idx = 0; pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF : CONTEXT_NEWLINE | CONTEXT_BEGBUF); diff --git a/posix/regex_internal.h b/posix/regex_internal.h index f676ae2746..75cc81517b 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -37,7 +37,8 @@ # define __iswctype iswctype # define __btowc btowc # define __mempcpy memcpy -#endif +# define attribute_hidden +#endif /* not _LIBC */ extern const char __re_error_msgid[] attribute_hidden; extern const size_t __re_error_msgid_idx[] attribute_hidden; @@ -161,7 +162,10 @@ typedef struct #ifdef _LIBC uint32_t *range_starts; uint32_t *range_ends; -#endif +#else /* not _LIBC */ + wchar_t *range_starts; + wchar_t *range_ends; +#endif /* not _LIBC */ int nranges; /* Character classes. */ diff --git a/posix/regexec.c b/posix/regexec.c index e888970936..2c7a2774eb 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -64,9 +64,11 @@ static int proceed_next_node (const regex_t *preg, static reg_errcode_t set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, regmatch_t *pmatch, int last); +#ifdef RE_ENABLE_I18N static int sift_states_iter_mb (const regex_t *preg, const re_match_context_t *mctx, int node_idx, int str_idx, int max_str_idx); +#endif /* RE_ENABLE_I18N */ static int sift_states_iter_bkref (const re_dfa_t *dfa, re_dfastate_t **state_log, struct re_backref_cache_entry *mctx_entry, @@ -88,9 +90,11 @@ static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg, re_dfastate_t *pstate, int fl_search, re_match_context_t *mctx); +#ifdef RE_ENABLE_I18N static reg_errcode_t transit_state_mb (const regex_t *preg, re_dfastate_t *pstate, re_match_context_t *mctx); +#endif /* RE_ENABLE_I18N */ static reg_errcode_t transit_state_bkref (const regex_t *preg, re_dfastate_t *pstate, re_match_context_t *mctx); @@ -101,10 +105,14 @@ static reg_errcode_t transit_state_bkref_loop (const regex_t *preg, static re_dfastate_t **build_trtable (const regex_t *dfa, const re_dfastate_t *state, int fl_search); +#ifdef RE_ENABLE_I18N static int check_node_accept_bytes (const regex_t *preg, int node_idx, const re_string_t *input, int idx); +# ifdef _LIBC static unsigned int find_collation_sequence_value (const unsigned char *mbs, size_t name_len); +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ static int group_nodes_into_DFAstates (const regex_t *dfa, const re_dfastate_t *state, re_node_set *states_node, @@ -912,9 +920,12 @@ proceed_next_node (preg, mctx, pidx, node, eps_via_nodes) type = dfa->nodes[entity].type; } +#ifdef RE_ENABLE_I18N if (ACCEPT_MB_NODE (type)) naccepted = check_node_accept_bytes (preg, entity, mctx->input, *pidx); - else if (type == OP_BACK_REF) + else +#endif /* RE_ENABLE_I18N */ + if (type == OP_BACK_REF) { for (i = 0; i < mctx->nbkref_ents; ++i) { @@ -1121,13 +1132,16 @@ sift_states_backward (preg, mctx, last_node) type = dfa->nodes[entity].type; } +#ifdef RE_ENABLE_I18N /* If the node may accept `multi byte'. */ if (ACCEPT_MB_NODE (type)) naccepted = sift_states_iter_mb (preg, mctx, entity, str_idx, mctx->match_last); /* If the node is a back reference. */ - else if (type == OP_BACK_REF) + else +#endif /* RE_ENABLE_I18N */ + if (type == OP_BACK_REF) for (j = 0; j < mctx->nbkref_ents; ++j) { naccepted = sift_states_iter_bkref (dfa, mctx->state_log, @@ -1201,6 +1215,7 @@ clean_state_log_if_need (mctx, next_state_log_idx) return REG_NOERROR; } +#ifdef RE_ENABLE_I18N static int sift_states_iter_mb (preg, mctx, node_idx, str_idx, max_str_idx) const regex_t *preg; @@ -1222,6 +1237,7 @@ sift_states_iter_mb (preg, mctx, node_idx, str_idx, max_str_idx) `naccepted' bytes input. */ return naccepted; } +#endif /* RE_ENABLE_I18N */ static int sift_states_iter_bkref (dfa, state_log, mctx_entry, node_idx, idx, match_last) @@ -1317,6 +1333,7 @@ transit_state (err, preg, mctx, state, fl_search) } else { +#ifdef RE_ENABLE_I18N /* If the current state can accept multibyte. */ if (state->accept_mb) { @@ -1324,6 +1341,7 @@ transit_state (err, preg, mctx, state, fl_search) if (BE (*err != REG_NOERROR, 0)) return NULL; } +#endif /* RE_ENABLE_I18N */ /* Then decide the next state with the single byte. */ if (1) @@ -1474,6 +1492,7 @@ transit_state_sb (err, preg, state, fl_search, mctx) return next_state; } +#ifdef RE_ENABLE_I18N static reg_errcode_t transit_state_mb (preg, pstate, mctx) const regex_t *preg; @@ -1543,6 +1562,7 @@ transit_state_mb (preg, pstate, mctx) } return REG_NOERROR; } +#endif /* RE_ENABLE_I18N */ static reg_errcode_t transit_state_bkref (preg, pstate, mctx) @@ -1991,7 +2011,14 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) return ndests; } -/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. */ +#ifdef RE_ENABLE_I18N +/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. + Return the number of the bytes the node accepts. + STR_IDX is the current index of the input string. + + This function handles the nodes which can accept one character, or + one collating element like '.', '[a-z]', opposite to the other nodes + can only accept one byte. */ static int check_node_accept_bytes (preg, node_idx, input, str_idx) @@ -2003,14 +2030,16 @@ check_node_accept_bytes (preg, node_idx, input, str_idx) const re_token_t *node = dfa->nodes + node_idx; int elem_len = re_string_elem_size_at (input, str_idx); int char_len = re_string_char_size_at (input, str_idx); - int i, j; -#ifdef _LIBC + int i; +# ifdef _LIBC + int j; uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); -#endif /* _LIBC */ +# endif /* _LIBC */ if (elem_len <= 1 && char_len <= 1) return 0; if (node->type == OP_PERIOD) { + /* '.' accepts any one character except the following two cases. */ if ((!(preg->syntax & RE_DOT_NEWLINE) && re_string_byte_at (input, str_idx) == '\n') || ((preg->syntax & RE_DOT_NOT_NULL) && @@ -2021,18 +2050,40 @@ check_node_accept_bytes (preg, node_idx, input, str_idx) else if (node->type == COMPLEX_BRACKET) { const re_charset_t *cset = node->opr.mbcset; +# ifdef _LIBC const unsigned char *pin = re_string_get_buffer (input) + str_idx; -#ifdef _LIBC +# endif /* _LIBC */ + int match_len = 0; + wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) + ? re_string_wchar_at (input, str_idx) : 0); + + /* match with multibyte character? */ + for (i = 0; i < cset->nmbchars; ++i) + if (wc == cset->mbchars[i]) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + /* match with character_class? */ + for (i = 0; i < cset->nchar_classes; ++i) + { + wctype_t wt = cset->char_classes[i]; + if (__iswctype (wc, wt)) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + +# ifdef _LIBC if (nrules != 0) { - int match_len = 0; unsigned int in_collseq = 0; const int32_t *table, *indirect; const unsigned char *weights, *extra, *collseqwc; int32_t idx; - wchar_t wc = 0; /* This #include defines a local function! */ -# include +# include /* match with collating_symbol? */ if (cset->ncoll_syms) @@ -2057,9 +2108,6 @@ check_node_accept_bytes (preg, node_idx, input, str_idx) } } - if (cset->nranges || cset->nchar_classes || cset->nmbchars) - wc = re_string_wchar_at (input, str_idx); - if (cset->nranges) { if (elem_len <= char_len) @@ -2112,43 +2160,39 @@ check_node_accept_bytes (preg, node_idx, input, str_idx) } } } - - /* match with multibyte character? */ - for (i = 0; i < cset->nmbchars; ++i) - if (wc == cset->mbchars[i]) - { - match_len = char_len; - goto check_node_accept_bytes_match; - } - - /* match with character_class? */ - for (i = 0; i < cset->nchar_classes; ++i) + } + else +# endif /* _LIBC */ + { + /* match with range expression? */ + wchar_t cmp_buf[6] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; + for (i = 0; i < cset->nranges; ++i) { - wctype_t wt = cset->char_classes[i]; - if (__iswctype (wc, wt)) + cmp_buf[0] = cset->range_starts[i]; + cmp_buf[4] = cset->range_ends[i]; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) { match_len = char_len; goto check_node_accept_bytes_match; } } - - check_node_accept_bytes_match: - if (!cset->non_match) - return match_len; + } + check_node_accept_bytes_match: + if (!cset->non_match) + return match_len; + else + { + if (match_len > 0) + return 0; else - { - if (match_len > 0) - return 0; - else - return re_string_elem_size_at (input, str_idx); - } + return (elem_len > char_len) ? elem_len : char_len; } -#endif } return 0; } -#ifdef _LIBC +# ifdef _LIBC static unsigned int find_collation_sequence_value (mbs, mbs_len) const unsigned char *mbs; @@ -2204,7 +2248,8 @@ find_collation_sequence_value (mbs, mbs_len) } } } -#endif +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ /* Check whether the node accepts the byte which is IDX-th byte of the INPUT. */ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/semctl.c b/sysdeps/unix/sysv/linux/sparc/sparc32/semctl.c index e9b1a483c9..ed6957f5d9 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/semctl.c +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/semctl.c @@ -1 +1,205 @@ -#include +/* Semctl for architectures where word sized unions are passed indirectly + Copyright (C) 1995, 1997, 1998, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , August 1995. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "kernel-features.h" +#include + +struct __old_semid_ds +{ + struct __old_ipc_perm sem_perm; /* operation permission struct */ + __time_t sem_otime; /* last semop() time */ + __time_t sem_ctime; /* last time changed by semctl() */ + struct sem *__sembase; /* ptr to first semaphore in array */ + struct sem_queue *__sem_pending; /* pending operations */ + struct sem_queue *__sem_pending_last; /* last pending operation */ + struct sem_undo *__undo; /* ondo requests on this array */ + unsigned short int sem_nsems; /* number of semaphores in set */ +}; + +/* Define a `union semun' suitable for Linux here. */ +union semun +{ + int val; /* value for SETVAL */ + struct semid_ds *buf; /* buffer for IPC_STAT & IPC_SET */ + unsigned short int *array; /* array for GETALL & SETALL */ + struct seminfo *__buf; /* buffer for IPC_INFO */ +}; + +#include +#include /* definition of CHECK_SEMCTL needs union semum */ + +#ifdef __NR_getuid32 +# if __ASSUME_32BITUIDS == 0 +/* This variable is shared with all files that need to check for 32bit + uids. */ +extern int __libc_missing_32bit_uids; +# endif +#endif + +/* Return identifier for array of NSEMS semaphores associated with + KEY. */ +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_2) +int __old_semctl (int semid, int semnum, int cmd, ...); +#endif +int __new_semctl (int semid, int semnum, int cmd, ...); + +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_2) +int +__old_semctl (int semid, int semnum, int cmd, ...) +{ + union semun arg; + va_list ap; + + /* Get the argument only if required. */ + arg.buf = NULL; + switch (cmd) + { + case SETVAL: /* arg.val */ + case GETALL: /* arg.array */ + case SETALL: + case IPC_STAT: /* arg.buf */ + case IPC_SET: + case SEM_STAT: + case IPC_INFO: /* arg.__buf */ + case SEM_INFO: + va_start (ap, cmd); + arg = va_arg (ap, union semun); + va_end (ap); + break; + } + + return INLINE_SYSCALL (ipc, 5, IPCOP_semctl, semid, semnum, cmd, + CHECK_SEMCTL (&arg, semid, cmd)); +} +compat_symbol (libc, __old_semctl, semctl, GLIBC_2_0); +#endif + +int +__new_semctl (int semid, int semnum, int cmd, ...) +{ + union semun arg; + va_list ap; + + /* Get the argument only if required. */ + arg.buf = NULL; + switch (cmd) + { + case SETVAL: /* arg.val */ + case GETALL: /* arg.array */ + case SETALL: + case IPC_STAT: /* arg.buf */ + case IPC_SET: + case SEM_STAT: + case IPC_INFO: /* arg.__buf */ + case SEM_INFO: + va_start (ap, cmd); + arg = va_arg (ap, union semun); + va_end (ap); + break; + } + +#if __ASSUME_32BITUIDS > 0 + return INLINE_SYSCALL (ipc, 5, IPCOP_semctl, semid, semnum, cmd | __IPC_64, + CHECK_SEMCTL (&arg, semid, cmd | __IPC_64)); +#else + switch (cmd) { + case SEM_STAT: + case IPC_STAT: + case IPC_SET: + break; + default: + return INLINE_SYSCALL (ipc, 5, IPCOP_semctl, semid, semnum, cmd, + CHECK_SEMCTL (&arg, semid, cmd)); + } + + { + int result; + struct __old_semid_ds old; + struct semid_ds *buf; + +#ifdef __NR_getuid32 + if (__libc_missing_32bit_uids <= 0) + { + if (__libc_missing_32bit_uids < 0) + { + int save_errno = errno; + + /* Test presence of new IPC by testing for getuid32 syscall. */ + result = INLINE_SYSCALL (getuid32, 0); + if (result == -1 && errno == ENOSYS) + __libc_missing_32bit_uids = 1; + else + __libc_missing_32bit_uids = 0; + __set_errno(save_errno); + } + if (__libc_missing_32bit_uids <= 0) + { + result = INLINE_SYSCALL (ipc, 5, IPCOP_semctl, semid, semnum, cmd | __IPC_64, + CHECK_SEMCTL (&arg, semid, cmd | __IPC_64)); + return result; + } + } +#endif + + buf = arg.buf; + arg.buf = (struct semid_ds *)&old; + if (cmd == IPC_SET) + { + old.sem_perm.uid = buf->sem_perm.uid; + old.sem_perm.gid = buf->sem_perm.gid; + old.sem_perm.mode = buf->sem_perm.mode; + if (old.sem_perm.uid != buf->sem_perm.uid || + old.sem_perm.gid != buf->sem_perm.gid) + { + __set_errno (EINVAL); + return -1; + } + } + result = INLINE_SYSCALL (ipc, 5, IPCOP_semctl, semid, semnum, cmd, + CHECK_SEMCTL (&arg, semid, cmd)); + if (result != -1 && cmd != IPC_SET) + { + memset(buf, 0, sizeof(*buf)); + buf->sem_perm.__key = old.sem_perm.__key; + buf->sem_perm.uid = old.sem_perm.uid; + buf->sem_perm.gid = old.sem_perm.gid; + buf->sem_perm.cuid = old.sem_perm.cuid; + buf->sem_perm.cgid = old.sem_perm.cgid; + buf->sem_perm.mode = old.sem_perm.mode; + buf->sem_perm.__seq = old.sem_perm.__seq; + buf->sem_otime = old.sem_otime; + buf->sem_ctime = old.sem_ctime; + buf->sem_nsems = old.sem_nsems; + } + return result; + } +#endif +} + +versioned_symbol (libc, __new_semctl, semctl, GLIBC_2_2); -- cgit v1.2.3