From c2a150d089fa096cb5f9e342da80fb30dc0d1953 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Tue, 29 Dec 2020 17:32:25 -0300 Subject: posix: Sync regex code with gnulib It sync with gnulib commit 43ee1a6bf. The main change is 9682f18e9. (which does not have a meaniful description). Checked on x86_64-linux-gnu. --- posix/regcomp.c | 2 +- posix/regex.h | 17 ++++++++++++----- posix/regex_internal.c | 19 ++++++++++--------- posix/regex_internal.h | 16 ++++++++++++---- 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/posix/regcomp.c b/posix/regcomp.c index e28e3701db..d93698ae78 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -558,7 +558,7 @@ weak_alias (__regerror, regerror) static const bitset_t utf8_sb_map = { /* Set the first 128 bits. */ -# if defined __GNUC__ && !defined __STRICT_ANSI__ +# if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__ [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX # else # if 4 * BITSET_WORD_BITS < ASCII_CHARS diff --git a/posix/regex.h b/posix/regex.h index bdc08ec720..8e4ef45578 100644 --- a/posix/regex.h +++ b/posix/regex.h @@ -612,7 +612,9 @@ extern int re_exec (const char *); 'configure' might #define 'restrict' to those words, so pick a different name. */ #ifndef _Restrict_ -# if defined __restrict || 2 < __GNUC__ + (95 <= __GNUC_MINOR__) +# if defined __restrict \ + || 2 < __GNUC__ + (95 <= __GNUC_MINOR__) \ + || __clang_major__ >= 3 # define _Restrict_ __restrict # elif 199901L <= __STDC_VERSION__ || defined restrict # define _Restrict_ restrict @@ -620,13 +622,18 @@ extern int re_exec (const char *); # define _Restrict_ # endif #endif -/* For [restrict], use glibc's __restrict_arr if available. - Otherwise, GCC 3.1 (not in C++ mode) and C99 support [restrict]. */ +/* For the ISO C99 syntax + array_name[restrict] + use glibc's __restrict_arr if available. + Otherwise, GCC 3.1 and clang support this syntax (but not in C++ mode). + Other ISO C99 compilers support it as well. */ #ifndef _Restrict_arr_ # ifdef __restrict_arr # define _Restrict_arr_ __restrict_arr -# elif ((199901L <= __STDC_VERSION__ || 3 < __GNUC__ + (1 <= __GNUC_MINOR__)) \ - && !defined __GNUG__) +# elif ((199901L <= __STDC_VERSION__ \ + || 3 < __GNUC__ + (1 <= __GNUC_MINOR__) \ + || __clang_major__ >= 3) \ + && !defined __cplusplus) # define _Restrict_arr_ _Restrict_ # else # define _Restrict_arr_ diff --git a/posix/regex_internal.c b/posix/regex_internal.c index 1351c46023..9dd387ef85 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -300,18 +300,20 @@ build_wcs_upper_buffer (re_string_t *pstr) while (byte_idx < end_idx) { wchar_t wc; + unsigned char ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; - if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) - && mbsinit (&pstr->cur_state)) + if (isascii (ch) && mbsinit (&pstr->cur_state)) { - /* In case of a singlebyte character. */ - pstr->mbs[byte_idx] - = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); /* The next step uses the assumption that wchar_t is encoded ASCII-safe: all ASCII values can be converted like this. */ - pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; - ++byte_idx; - continue; + wchar_t wcu = __towupper (ch); + if (isascii (wcu)) + { + pstr->mbs[byte_idx] = wcu; + pstr->wcs[byte_idx] = wcu; + byte_idx++; + continue; + } } remain_len = end_idx - byte_idx; @@ -348,7 +350,6 @@ build_wcs_upper_buffer (re_string_t *pstr) { /* It is an invalid character, an incomplete character at the end of the string, or '\0'. Just use the byte. */ - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; pstr->mbs[byte_idx] = ch; /* And also cast it to wide char. */ pstr->wcs[byte_idx++] = (wchar_t) ch; diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 268ef05c38..e31ac92674 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -77,6 +77,14 @@ # define isblank(ch) ((ch) == ' ' || (ch) == '\t') #endif +/* regex code assumes isascii has its usual numeric meaning, + even if the portable character set uses EBCDIC encoding, + and even if wint_t is wider than int. */ +#ifndef _LIBC +# undef isascii +# define isascii(c) (((c) & ~0x7f) == 0) +#endif + #ifdef _LIBC # ifndef _RE_DEFINE_LOCALE_FUNCTIONS # define _RE_DEFINE_LOCALE_FUNCTIONS 1 @@ -335,7 +343,7 @@ typedef struct Idx idx; /* for BACK_REF */ re_context_type ctx_type; /* for ANCHOR */ } opr; -#if __GNUC__ >= 2 && !defined __STRICT_ANSI__ +#if (__GNUC__ >= 2 || defined __clang__) && !defined __STRICT_ANSI__ re_token_type_t type : 8; #else re_token_type_t type; @@ -841,10 +849,10 @@ re_string_elem_size_at (const re_string_t *pstr, Idx idx) #endif /* RE_ENABLE_I18N */ #ifndef FALLTHROUGH -# if __GNUC__ < 7 -# define FALLTHROUGH ((void) 0) -# else +# if (__GNUC__ >= 7) || (__clang_major__ >= 10) # define FALLTHROUGH __attribute__ ((__fallthrough__)) +# else +# define FALLTHROUGH ((void) 0) # endif #endif -- cgit v1.2.3