From 81c64d407c82933f18dc09de9bf58cc76d6a6148 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 10 Jul 2002 23:09:16 +0000 Subject: Update. 2002-07-10 Ulrich Drepper * Versions.def [libpthread]: Add GLIBC_2.2.6. * posix/Versions [libc] (GLIBC_2.2.6): Add __nanosleep. 2002-07-06 Bruno Haible * sysdeps/unix/sysv/sysv4/bits/sigset.h (__NSSBITS): Correct value. * sysdeps/unix/sysv/linux/bits/statvfs.h (ST_NODIRATIME): Set to 2048. --- ChangeLog | 12 ++++-- Versions.def | 2 + libio/Makefile | 3 +- libio/freopen.c | 27 ++++++++---- libio/freopen64.c | 10 ++++- libio/stdio.h | 9 ++-- libio/tst-freopen.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++ linuxthreads_db/ChangeLog | 4 ++ linuxthreads_db/Versions | 3 ++ posix/Makefile | 3 +- posix/Versions | 4 ++ posix/bug-regex4.c | 1 - posix/bug-regex6.c | 74 +++++++++++++++++++++++++++++++++ posix/regcomp.c | 60 +++++++++++++++------------ posix/regex_internal.h | 45 +++++++++++++------- posix/regexec.c | 94 +++++++++++++++++++++++------------------- 16 files changed, 350 insertions(+), 104 deletions(-) create mode 100644 libio/tst-freopen.c create mode 100644 posix/bug-regex6.c diff --git a/ChangeLog b/ChangeLog index 8c605c478b..6bc44d7ff6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,11 @@ -2002-07-08 H.J. Lu +2002-07-10 Ulrich Drepper - * sysdeps/unix/sysv/linux/mips/sys/shm.h: New. + * Versions.def [libpthread]: Add GLIBC_2.2.6. + * posix/Versions [libc] (GLIBC_2.2.6): Add __nanosleep. + +2002-07-06 Bruno Haible + + * sysdeps/unix/sysv/sysv4/bits/sigset.h (__NSSBITS): Correct value. 2002-07-06 Bruno Haible @@ -18,8 +23,7 @@ * sysdeps/generic/bits/stropts.h: Protect against direct inclusion. * sysdeps/generic/bits/ustat.h: Likewise. - * sysdeps/unix/sysv/linux/bits/statvfs.h (ST_NODIRATIME): Set to - 2048. + * sysdeps/unix/sysv/linux/bits/statvfs.h (ST_NODIRATIME): Set to 2048. 2002-07-08 Andreas Jaeger diff --git a/Versions.def b/Versions.def index e5be6a86a2..6537c37a16 100644 --- a/Versions.def +++ b/Versions.def @@ -66,6 +66,7 @@ libpthread { GLIBC_2.1.2 GLIBC_2.2 GLIBC_2.2.3 + GLIBC_2.2.6 GLIBC_PRIVATE } libresolv { @@ -89,6 +90,7 @@ ld { libthread_db { GLIBC_2.1.3 GLIBC_2.2.3 + GLIBC_2.3 } libanl { GLIBC_2.2.3 diff --git a/libio/Makefile b/libio/Makefile index 59948668b2..f4c5095e5f 100644 --- a/libio/Makefile +++ b/libio/Makefile @@ -49,7 +49,8 @@ routines := \ tests = tst_swprintf tst_wprintf tst_swscanf tst_wscanf tst_getwc tst_putwc \ tst_wprintf2 tst-widetext test-fmemopen tst-ext tst-fopenloc \ tst-fgetws tst-ungetwc1 tst-ungetwc2 tst-swscanf tst-sscanf \ - tst-mmap-setvbuf bug-ungetwc1 bug-ungetwc2 tst-atime tst-eof + tst-mmap-setvbuf bug-ungetwc1 bug-ungetwc2 tst-atime tst-eof \ + tst-freopen test-srcs = test-freopen all: # Make this the default target; it will be defined in Rules. diff --git a/libio/freopen.c b/libio/freopen.c index a38313c334..01816a179a 100644 --- a/libio/freopen.c +++ b/libio/freopen.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1993,95,96,97,98,2000,2001 Free Software Foundation, Inc. +/* Copyright (C) 1993,95,96,97,98,2000,2001,2002 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -53,15 +53,26 @@ freopen (filename, mode, fp) } #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1) if (&_IO_stdin_used == NULL) - /* If the shared C library is used by the application binary which - was linked against the older version of libio, we just use the - older one even for internal use to avoid trouble since a pointer - to the old libio may be passed into shared C library and wind - up here. */ - result = _IO_old_freopen (filename, mode, fp); + { + /* If the shared C library is used by the application binary which + was linked against the older version of libio, we just use the + older one even for internal use to avoid trouble since a pointer + to the old libio may be passed into shared C library and wind + up here. */ + _IO_old_file_close_it (fp); + _IO_JUMPS ((struct _IO_FILE_plus *) fp) = &_IO_old_file_jumps; + result = _IO_old_file_fopen (fp, filename, mode); + } else #endif - result = _IO_freopen (filename, mode, fp); + { + INTUSE(_IO_file_close_it) (fp); + _IO_JUMPS ((struct _IO_FILE_plus *) fp) = &INTUSE(_IO_file_jumps); + fp->_wide_data->_wide_vtable = &INTUSE(_IO_wfile_jumps); + result = INTUSE(_IO_file_fopen) (fp, filename, mode, 1); + if (result != NULL) + result = __fopen_maybe_mmap (result); + } if (result != NULL) /* unbound stream orientation */ result->_mode = 0; diff --git a/libio/freopen64.c b/libio/freopen64.c index 941eda0aa9..7ab9bd315a 100644 --- a/libio/freopen64.c +++ b/libio/freopen64.c @@ -1,4 +1,5 @@ -/* Copyright (C) 1993,1995,1996,1997,1998,2000,2001 Free Software Foundation, Inc. +/* Copyright (C) 1993,1995,1996,1997,1998,2000,2001,2002 + Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -51,7 +52,12 @@ freopen64 (filename, mode, fp) if (fd != -1) filename = fd_to_filename (fd); } - result = _IO_freopen64 (filename, mode, fp); + INTUSE(_IO_file_close_it) (fp); + _IO_JUMPS ((struct _IO_FILE_plus *) fp) = &INTUSE(_IO_file_jumps); + fp->_wide_data->_wide_vtable = &INTUSE(_IO_wfile_jumps); + result = INTUSE(_IO_file_fopen) (fp, filename, mode, 0); + if (result != NULL) + result = __fopen_maybe_mmap (result); if (result != NULL) /* unbound stream orientation */ result->_mode = 0; diff --git a/libio/stdio.h b/libio/stdio.h index 47f80d88c8..bcdf64d72e 100644 --- a/libio/stdio.h +++ b/libio/stdio.h @@ -1,5 +1,6 @@ /* Define ISO C stdio on top of C++ iostreams. - Copyright (C) 1991, 1994-1999, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1991, 1994-1999, 2000, 2001, 2002 + Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -595,10 +596,12 @@ struct obstack; /* See . */ /* Write formatted output to an obstack. */ extern int obstack_printf (struct obstack *__restrict __obstack, - __const char *__restrict __format, ...) __THROW; + __const char *__restrict __format, ...) + __THROW __attribute__ ((__format__ (__printf__, 2, 3))); extern int obstack_vprintf (struct obstack *__restrict __obstack, __const char *__restrict __format, - _G_va_list __args) __THROW; + _G_va_list __args) + __THROW __attribute__ ((__format__ (__printf__, 2, 0))); #endif /* Use GNU. */ diff --git a/libio/tst-freopen.c b/libio/tst-freopen.c new file mode 100644 index 0000000000..20e5f2f874 --- /dev/null +++ b/libio/tst-freopen.c @@ -0,0 +1,103 @@ +/* Test freopen with mmap stdio. + Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek , 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +int main (void) +{ + char name[] = "/tmp/tst-freopen.XXXXXX"; + char buf[4096]; + const char * const test = "Let's test freopen.\n"; + char temp[strlen (test) + 1]; + int fd = mkstemp (name); + FILE *f; + + if (fd == -1) + { + printf ("%Zd: cannot open temporary file: %m\n", __LINE__); + exit (1); + } + + f = fdopen (fd, "w"); + if (f == NULL) + { + printf ("%Zd: cannot fdopen temporary file: %m\n", __LINE__); + exit (1); + } + + fputs (test, f); + fclose (f); + + f = fopen (name, "r"); + if (f == NULL) + { + printf ("%Zd: cannot fopen temporary file: %m\n", __LINE__); + exit (1); + } + + if (fread (temp, 1, strlen (test), f) != strlen (test)) + { + printf ("%Zd: couldn't read the file back: %m\n", __LINE__); + exit (1); + } + temp [strlen (test)] = '\0'; + + if (strcmp (test, temp)) + { + printf ("%Zd: read different string than was written:\n%s%s", + __LINE__, test, temp); + exit (1); + } + + f = freopen (name, "r+", f); + if (f == NULL) + { + printf ("%Zd: cannot freopen temporary file: %m\n", __LINE__); + exit (1); + } + + if (fseek (f, 0, SEEK_SET) != 0) + { + printf ("%Zd: couldn't fseek to start: %m\n", __LINE__); + exit (1); + } + + if (fread (temp, 1, strlen (test), f) != strlen (test)) + { + printf ("%Zd: couldn't read the file back: %m\n", __LINE__); + exit (1); + } + temp [strlen (test)] = '\0'; + + if (strcmp (test, temp)) + { + printf ("%Zd: read different string than was written:\n%s%s", + __LINE__, test, temp); + exit (1); + } + + fclose (f); + + unlink (name); + exit (0); +} diff --git a/linuxthreads_db/ChangeLog b/linuxthreads_db/ChangeLog index 33a38e5f32..c5cbfba126 100644 --- a/linuxthreads_db/ChangeLog +++ b/linuxthreads_db/ChangeLog @@ -1,3 +1,7 @@ +2002-07-10 Ulrich Drepper + + * Versions [libthread_db] (GLIBC_2.3): Add td_thr_tls_get_addr. + 2002-06-14 H.J. Lu * td_thr_tls_get_addr.c (td_thr_tls_get_addr): Don't include diff --git a/linuxthreads_db/Versions b/linuxthreads_db/Versions index 57619a3732..4ca8042c12 100644 --- a/linuxthreads_db/Versions +++ b/linuxthreads_db/Versions @@ -15,4 +15,7 @@ libthread_db { GLIBC_2.2.3 { td_symbol_list; } + GLIBC_2.3 { + td_thr_tls_get_addr; + } } diff --git a/posix/Makefile b/posix/Makefile index 008154de9a..db58cf5c7c 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -71,7 +71,7 @@ tests := tstgetopt testfnm runtests runptests \ tst-getlogin tst-mmap tst-getaddrinfo tst-truncate \ tst-truncate64 tst-fork tst-fnmatch tst-regexloc tst-dir \ tst-chmod bug-regex1 bug-regex2 bug-regex3 bug-regex4 \ - tst-gnuglob tst-regex bug-regex5 + tst-gnuglob tst-regex bug-regex5 bug-regex6 ifeq (yes,$(build-shared)) test-srcs := globtest tests += wordexp-test tst-exec tst-spawn @@ -125,6 +125,7 @@ tst-regexloc-ENV = LOCPATH=$(common-objpfx)localedata bug-regex1-ENV = LOCPATH=$(common-objpfx)localedata tst-regex-ENV = LOCPATH=$(common-objpfx)localedata bug-regex5-ENV = LOCPATH=$(common-objpfx)localedata +bug-regex6-ENV = LOCPATH=$(common-objpfx)localedata testcases.h: TESTS TESTS2C.sed sed -f TESTS2C.sed < $< > $@T diff --git a/posix/Versions b/posix/Versions index c79a042aa3..07dc49a4d4 100644 --- a/posix/Versions +++ b/posix/Versions @@ -105,6 +105,10 @@ libc { # Extended Interface. fnmatch; } + GLIBC_2.2.6 { + # For syscall wrapper + __nanosleep; + } GLIBC_PRIVATE { # functions which have an additional interface since they are # are cancelable. diff --git a/posix/bug-regex4.c b/posix/bug-regex4.c index 76841e9db3..5ecca1a8d0 100644 --- a/posix/bug-regex4.c +++ b/posix/bug-regex4.c @@ -35,7 +35,6 @@ main (void) setlocale (LC_ALL, "C"); - setlocale (LC_ALL, "C"); s = re_compile_pattern ("ab[cde]", 7, ®ex); if (s != NULL) { diff --git a/posix/bug-regex6.c b/posix/bug-regex6.c new file mode 100644 index 0000000000..9a06898a26 --- /dev/null +++ b/posix/bug-regex6.c @@ -0,0 +1,74 @@ +/* Test for regexec. + Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek , 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +int +main (int argc, char *argv[]) +{ + regex_t re; + regmatch_t mat[10]; + int i, j, ret = 0; + char *locales[] = { "C", "de_DE.UTF-8" }; + char *string = "http://www.regex.com/pattern/matching.html#intro"; + regmatch_t expect[10] = { + { 0, 48 }, { 0, 5 }, { 0, 4 }, { 5, 20 }, { 7, 20 }, { 20, 42 }, + { -1, -1 }, { -1, -1 }, { 42, 48 }, { 43, 48 } }; + + for (i = 0; i < sizeof (locales) / sizeof (locales[0]); ++i) + { + if (setlocale (LC_ALL, locales[i]) == NULL) + { + puts ("cannot set locale"); + ret = 1; + } + else if (regcomp (&re, + "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?", + REG_EXTENDED) != REG_NOERROR) + { + puts ("cannot compile expression \"[a-f]*\""); + ret = 1; + } + else if (regexec (&re, string, 10, mat, 0) == REG_NOMATCH) + { + puts ("no match"); + ret = 1; + } + else + { + if (! memcmp (mat, expect, sizeof (mat))) + printf ("matching ok for %s locale\n", locales[i]); + else + { + printf ("matching failed for %s locale:\n", locales[i]); + ret = 1; + for (j = 0; j < 9; ++j) + if (mat[j].rm_so != -1) + printf ("%d: %.*s\n", j, mat[j].rm_eo - mat[j].rm_so, + string + mat[j].rm_so); + } + } + } + + return ret; +} diff --git a/posix/regcomp.c b/posix/regcomp.c index b9b056065f..513604274c 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -724,7 +724,6 @@ re_compile_internal (preg, pattern, length, syntax) dfa = re_realloc (preg->buffer, re_dfa_t, 1); if (dfa == NULL) return REG_ESPACE; - memset (dfa, '\0', sizeof (re_dfa_t)); preg->allocated = sizeof (re_dfa_t); } preg->buffer = (unsigned char *) dfa; @@ -781,6 +780,9 @@ init_dfa (dfa, pat_len) int pat_len; { int table_size; + + memset (dfa, '\0', sizeof (re_dfa_t)); + dfa->nodes_alloc = pat_len + 1; dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); @@ -1001,8 +1003,6 @@ calc_first (dfa, node) switch (type) { #ifdef DEBUG - case OP_OPEN_SUBEXP: - case OP_CLOSE_SUBEXP: case OP_OPEN_BRACKET: case OP_CLOSE_BRACKET: case OP_OPEN_DUP_NUM: @@ -1028,6 +1028,8 @@ calc_first (dfa, node) case SIMPLE_BRACKET: case OP_BACK_REF: case ANCHOR: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: node->first = idx; break; case OP_DUP_PLUS: @@ -1041,14 +1043,6 @@ calc_first (dfa, node) case OP_ALT: node->first = idx; break; - case SUBEXP: - if (node->left == NULL) - { - if (node->next == -1) - calc_next (dfa, node); - node->first = node->next; - break; - } /* else fall through */ default: #ifdef DEBUG @@ -1161,7 +1155,9 @@ calc_epsdest (dfa, node) } re_node_set_init_2 (dfa->edests + idx, left, right); } - else if (dfa->nodes[idx].type == ANCHOR) + else if (dfa->nodes[idx].type == ANCHOR + || dfa->nodes[idx].type == OP_OPEN_SUBEXP + || dfa->nodes[idx].type == OP_CLOSE_SUBEXP) re_node_set_init_1 (dfa->edests + idx, node->next); } } @@ -2055,8 +2051,9 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err) reg_errcode_t *err; { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree; + bin_tree_t *tree, *left_par, *right_par; size_t cur_nsub; + int new_idx; cur_nsub = preg->re_nsub++; if (dfa->subexps_alloc < preg->re_nsub) { @@ -2073,30 +2070,39 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err) } dfa->subexps[cur_nsub].start = dfa->nodes_len; dfa->subexps[cur_nsub].end = -1; + + new_idx = re_dfa_add_node (dfa, *token, 0); + left_par = create_tree (NULL, NULL, 0, new_idx); + if (BE (new_idx == -1 || left_par == NULL, 0)) + return *err = REG_ESPACE, NULL; + dfa->nodes[new_idx].opr.idx = cur_nsub; *token = fetch_token (regexp, syntax); /* The subexpression may be a null string. */ if (token->type == OP_CLOSE_SUBEXP) - { - tree = create_tree (NULL, NULL, SUBEXP, 0); - if (BE (tree == NULL, 0)) - return *err = REG_ESPACE, NULL; - dfa->subexps[cur_nsub].end = dfa->nodes_len; - } + tree = NULL; else { tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; - dfa->subexps[cur_nsub].end = dfa->nodes_len; - if (BE (token->type != OP_CLOSE_SUBEXP, 0)) - { - free_bin_tree (tree); - *err = REG_BADPAT; - return NULL; - } - tree = create_tree (tree, NULL, SUBEXP, 0); } + if (BE (token->type != OP_CLOSE_SUBEXP, 0)) + { + free_bin_tree (tree); + *err = REG_BADPAT; + return NULL; + } + new_idx = re_dfa_add_node (dfa, *token, 0); + dfa->subexps[cur_nsub].end = dfa->nodes_len; + right_par = create_tree (NULL, NULL, 0, new_idx); + tree = ((tree == NULL) ? right_par + : create_tree (tree, right_par, CONCAT, 0)); + tree = create_tree (left_par, tree, CONCAT, 0); + if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0)) + return *err = REG_ESPACE, NULL; + dfa->nodes[new_idx].opr.idx = cur_nsub; + return tree; } diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 95ae46e297..20622540e8 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -96,8 +96,6 @@ typedef enum NON_TYPE = 0, /* Token type, these are used only by token. */ - OP_OPEN_SUBEXP, - OP_CLOSE_SUBEXP, OP_OPEN_BRACKET, OP_CLOSE_BRACKET, OP_CHARSET_RANGE, @@ -124,6 +122,8 @@ typedef enum #endif /* RE_ENABLE_I18N */ /* Node type, These are used by token, node, tree. */ + OP_OPEN_SUBEXP, + OP_CLOSE_SUBEXP, OP_PERIOD, CHARACTER, END_OF_RE, @@ -142,24 +142,18 @@ typedef enum #ifdef RE_ENABLE_I18N typedef struct { - /* If this character set is the non-matching list. */ - unsigned int non_match : 1; - /* Multibyte characters. */ wchar_t *mbchars; - int nmbchars; /* Collating symbols. */ # ifdef _LIBC int32_t *coll_syms; # endif - int ncoll_syms; /* Equivalence classes. */ # ifdef _LIBC int32_t *equiv_classes; # endif - int nequiv_classes; /* Range expressions. */ # ifdef _LIBC @@ -169,17 +163,32 @@ typedef struct wchar_t *range_starts; wchar_t *range_ends; # endif /* not _LIBC */ - int nranges; /* Character classes. */ wctype_t *char_classes; + + /* If this character set is the non-matching list. */ + unsigned int non_match : 1; + + /* # of multibyte characters. */ + int nmbchars; + + /* # of collating symbols. */ + int ncoll_syms; + + /* # of equivalence classes. */ + int nequiv_classes; + + /* # of range expressions. */ + int nranges; + + /* # of character classes. */ int nchar_classes; } re_charset_t; #endif /* RE_ENABLE_I18N */ typedef struct { - re_token_type_t type; union { unsigned char c; /* for CHARACTER */ @@ -195,6 +204,11 @@ typedef struct re_node_set *bkref_eclosure; } *ctx_info; } opr; +#if __GNUC__ >= 2 + re_token_type_t type : 8; +#else + re_token_type_t type; +#endif unsigned int constraint : 10; /* context constraint */ unsigned int duplicated : 1; #ifdef RE_ENABLE_I18N @@ -203,8 +217,9 @@ typedef struct } re_token_t; #define IS_EPSILON_NODE(type) \ - ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS || \ - (type) == OP_DUP_QUESTION || (type) == ANCHOR) + ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \ + || (type) == OP_DUP_QUESTION || (type) == ANCHOR \ + || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP) #define ACCEPT_MB_NODE(type) \ ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD) @@ -214,9 +229,6 @@ struct re_string_t /* Indicate the raw buffer which is the original string passed as an argument of regexec(), re_search(), etc.. */ const unsigned char *raw_mbs; - /* Index in RAW_MBS. Each character mbs[i] corresponds to - raw_mbs[raw_mbs_idx + i]. */ - int raw_mbs_idx; /* Store the multibyte string. In case of "case insensitive mode" like REG_ICASE, upper cases of the string are stored, otherwise MBS points the same address that RAW_MBS points. */ @@ -230,6 +242,9 @@ struct re_string_t wint_t *wcs; mbstate_t cur_state; #endif + /* Index in RAW_MBS. Each character mbs[i] corresponds to + raw_mbs[raw_mbs_idx + i]. */ + int raw_mbs_idx; /* The length of the valid characters in the buffers. */ int valid_len; /* The length of the buffers MBS, MBS_CASE, and WCS. */ diff --git a/posix/regexec.c b/posix/regexec.c index 2c7a2774eb..5dd3a06827 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -58,6 +58,8 @@ static int check_halt_node_context (const re_dfa_t *dfa, int node, static int check_halt_state_context (const regex_t *preg, const re_dfastate_t *state, const re_match_context_t *mctx, int idx); +static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node, + int cur_idx, int nmatch); static int proceed_next_node (const regex_t *preg, const re_match_context_t *mctx, int *pidx, int node, re_node_set *eps_via_nodes); @@ -886,24 +888,38 @@ proceed_next_node (preg, mctx, pidx, node, eps_via_nodes) re_node_set *eps_via_nodes; { re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - int i, dest_node = -1, err; + int i, err, dest_node, cur_entity; + dest_node = -1; + cur_entity = ((dfa->nodes[node].type == OP_CONTEXT_NODE) + ? dfa->nodes[node].opr.ctx_info->entity : node); if (IS_EPSILON_NODE (dfa->nodes[node].type)) { + int dest_entity = INT_MAX; err = re_node_set_insert (eps_via_nodes, node); if (BE (err < 0, 0)) return -1; for (i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i) { - int candidate = mctx->state_log[*pidx]->nodes.elems[i]; - if (!re_node_set_contains (dfa->edests + node, candidate) - && !(dfa->nodes[candidate].type == OP_CONTEXT_NODE - && re_node_set_contains (dfa->edests + node, - dfa->nodes[candidate].opr.ctx_info->entity))) - continue; - dest_node = candidate; + int candidate, candidate_entity; + candidate = mctx->state_log[*pidx]->nodes.elems[i]; + candidate_entity = ((dfa->nodes[candidate].type == OP_CONTEXT_NODE) + ? dfa->nodes[candidate].opr.ctx_info->entity + : candidate); + if (!re_node_set_contains (dfa->edests + node, candidate)) + if (candidate == candidate_entity + || !re_node_set_contains (dfa->edests + node, candidate_entity)) + continue; + /* In order to avoid infinite loop like "(a*)*". */ - if (!re_node_set_contains (eps_via_nodes, dest_node)) - break; + if (cur_entity > candidate_entity + && re_node_set_contains (eps_via_nodes, candidate)) + continue; + + if (dest_entity > candidate_entity) + { + dest_node = candidate; + dest_entity = candidate_entity; + } } #ifdef DEBUG assert (dest_node != -1); @@ -986,9 +1002,8 @@ set_regs (preg, mctx, nmatch, pmatch, last_node) int last_node; { re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - int idx, cur_node, node_entity, real_nmatch; + int idx, cur_node, real_nmatch; re_node_set eps_via_nodes; - int i; #ifdef DEBUG assert (nmatch > 1); assert (mctx->state_log != NULL); @@ -998,36 +1013,7 @@ set_regs (preg, mctx, nmatch, pmatch, last_node) re_node_set_init_empty (&eps_via_nodes); for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) { - node_entity = ((dfa->nodes[cur_node].type == OP_CONTEXT_NODE) - ? dfa->nodes[cur_node].opr.ctx_info->entity : cur_node); - for (i = 1; i < real_nmatch; ++i) - { - if (dfa->subexps[i - 1].start == dfa->subexps[i - 1].end) - { - /* In case of the null subexpression like '()'. */ - if (dfa->subexps[i - 1].start == node_entity) - { - pmatch[i].rm_so = idx; - pmatch[i].rm_eo = idx; - } - } - else if (dfa->subexps[i - 1].start <= node_entity - && node_entity < dfa->subexps[i - 1].end) - { - if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo != -1) - /* We are at the first node of this sub expression. */ - { - pmatch[i].rm_so = idx; - pmatch[i].rm_eo = -1; - } - } - else - { - if (pmatch[i].rm_so != -1 && pmatch[i].rm_eo == -1) - /* We are at the last node of this sub expression. */ - pmatch[i].rm_eo = idx; - } - } + update_regs (dfa, pmatch, cur_node, idx, real_nmatch); if (idx == pmatch[0].rm_eo && cur_node == last_node) break; @@ -1040,6 +1026,30 @@ set_regs (preg, mctx, nmatch, pmatch, last_node) return REG_NOERROR; } +static void +update_regs (dfa, pmatch, cur_node, cur_idx, nmatch) + re_dfa_t *dfa; + regmatch_t *pmatch; + int cur_node, cur_idx, nmatch; +{ + int type = dfa->nodes[cur_node].type; + int reg_num; + if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP) + return; + reg_num = dfa->nodes[cur_node].opr.idx + 1; + if (reg_num >= nmatch) + return; + if (type == OP_OPEN_SUBEXP) + { + /* We are at the first node of this sub expression. */ + pmatch[reg_num].rm_so = cur_idx; + pmatch[reg_num].rm_eo = -1; + } + else if (type == OP_CLOSE_SUBEXP) + /* We are at the first node of this sub expression. */ + pmatch[reg_num].rm_eo = cur_idx; + } + #define NUMBER_OF_STATE 1 /* This function checks the STATE_LOG from the MCTX->match_last to 0 -- cgit v1.2.3