From eb04c21373e2a2885f3d52ff192b0499afe3c672 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Wed, 20 Dec 2017 09:47:44 -0200 Subject: posix: Sync gnulib regex implementation This patch syncs the regex implementation with gnulib (commit 0ee5212). Only two changes in GLIBC regex testing are required: 1. posix/bug-regex28.c: as previously discussed [1] the change of expected results on the pattern should be safe. 2. posix/PCRE.tests: the ERE (a)|\1 is malformed (in the sense that the \1 doesn't mean anything) and although current GLIBC accepts it has undefined behavior. This patch removes the specific test. This sync contains some patches from thread 'Regex: Make libc regex more usable outside GLIBC.' [2] which have been pushed upstream in gnulib. This patches also fixes some regex issues (BZ #23233, BZ #21163, BZ #18986, BZ #13762) and I did not add testcases for both #23233 and #13762 because I couldn't think a simple way to trigger the expected failure path to trigger them. Checked on x86_64-linux-gnu and i686-linux-gnu. [BZ #23233] [BZ #21163] [BZ #18986] [BZ #13762] * posix/Makefile (tests): Add bug-regex37 and bug-regex38. * posix/PCRE.tests: Remove invalid test. * posix/bug-regex28.c: Fix expected values for used syntax. * posix/bug-regex37.c: New file. * posix/bug-regex38.c: Likewise. * posix/regcomp.c: Sync with gnulib. * posix/regex.c: Likewise. * posix/regex.h: Likewise. * posix/regex_internal.c: Likewise. * posix/regex_internal.h: Likewise. * posix/regexec.c: Likewise. [1] https://sourceware.org/ml/libc-alpha/2017-12/msg00807.html [2] https://sourceware.org/ml/libc-alpha/2017-12/msg00237.html --- posix/bug-regex28.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'posix/bug-regex28.c') diff --git a/posix/bug-regex28.c b/posix/bug-regex28.c index 5353edf373..ba263b27c8 100644 --- a/posix/bug-regex28.c +++ b/posix/bug-regex28.c @@ -21,18 +21,22 @@ #include #include +#include +#include + struct tests { const char *regex; const char *string; reg_syntax_t syntax; int retval; -} tests[] = { +}; +static const struct tests tests[] = { #define EGREP RE_SYNTAX_EGREP #define EGREP_NL (RE_SYNTAX_EGREP | RE_DOT_NEWLINE) & ~RE_HAT_LISTS_NOT_NEWLINE - { "a.b", "a\nb", EGREP, -1 }, + { "a.b", "a\nb", EGREP, 0 }, { "a.b", "a\nb", EGREP_NL, 0 }, - { "a[^x]b", "a\nb", EGREP, -1 }, + { "a[^x]b", "a\nb", EGREP, 0 }, { "a[^x]b", "a\nb", EGREP_NL, 0 }, /* While \S and \W are internally handled as [^[:space:]] and [^[:alnum:]_], RE_HAT_LISTS_NOT_NEWLINE did not make any difference, so ensure @@ -42,33 +46,33 @@ struct tests { "a\\Wb", "a\nb", EGREP, 0 }, { "a\\Wb", "a\nb", EGREP_NL, 0 } }; +static const size_t tests_size = sizeof (tests) / sizeof (tests[0]); -int -main (void) +static int +do_test (void) { struct re_pattern_buffer r; - size_t i; - int ret = 0; - for (i = 0; i < sizeof (tests) / sizeof (tests[i]); ++i) + for (size_t i = 0; i < tests_size; i++) { re_set_syntax (tests[i].syntax); memset (&r, 0, sizeof (r)); - if (re_compile_pattern (tests[i].regex, strlen (tests[i].regex), &r)) - { - printf ("re_compile_pattern %zd failed\n", i); - ret = 1; - continue; - } + const char *re = re_compile_pattern (tests[i].regex, + strlen (tests[i].regex), &r); + TEST_VERIFY (re == NULL); + if (re != NULL) + continue; + size_t len = strlen (tests[i].string); int rv = re_search (&r, tests[i].string, len, 0, len, NULL); - if (rv != tests[i].retval) - { - printf ("re_search %zd unexpected value %d != %d\n", - i, rv, tests[i].retval); - ret = 1; - } + TEST_VERIFY (rv == tests[i].retval); + if (test_verbose > 0) + printf ("info: i=%zu rv=%d expected=%d\n", i, rv, tests[i].retval); + regfree (&r); } - return ret; + + return 0; } + +#include -- cgit v1.2.3