aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2012-01-03 07:54:15 -0500
committerUlrich Drepper <drepper@gmail.com>2012-01-03 07:54:15 -0500
commit9f1151705e3d861a4d6d680e74158cd708119053 (patch)
tree8859073c745b1a6a09492a487be6e02ddfa4b9c2
parenta316c1f6823590fc6209e470c77d4cb44f4e2f50 (diff)
downloadglibc-9f1151705e3d861a4d6d680e74158cd708119053.tar
glibc-9f1151705e3d861a4d6d680e74158cd708119053.tar.gz
glibc-9f1151705e3d861a4d6d680e74158cd708119053.tar.bz2
glibc-9f1151705e3d861a4d6d680e74158cd708119053.zip
Optimize regex a bit
-rw-r--r--ChangeLog4
-rw-r--r--posix/regcomp.c36
2 files changed, 36 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 737c4ba196..73b1d419bf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2012-01-03 Ulrich Drepper <drepper@gmail.com>
+
+ * posix/regcomp.c (init_word_char): Optimize a bit for sane encodings.
+
2012-01-01 Ulrich Drepper <drepper@gmail.com>
* posix/getconf.c: Update copyright year.
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 34ee845081..6771dbb299 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002-2007,2009,2010,2011 Free Software Foundation, Inc.
+ Copyright (C) 2002-2007,2009,2010,2011,2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -926,10 +926,38 @@ static void
internal_function
init_word_char (re_dfa_t *dfa)
{
- int i, j, ch;
dfa->word_ops_used = 1;
- for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
- for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ int i = 0;
+ int ch = 0;
+ if (BE (dfa->map_notascii == 0, 1))
+ {
+ if (sizeof (dfa->word_char[0]) == 8)
+ {
+ dfa->word_char[0] = UINT64_C (0x03ff000000000000);
+ dfa->word_char[1] = UINT64_C (0x07fffffe87fffffe);
+ i = 2;
+ }
+ else if (sizeof (dfa->word_char[0]) == 4)
+ {
+ dfa->word_char[0] = UINT32_C (0x00000000);
+ dfa->word_char[1] = UINT32_C (0x03ff0000);
+ dfa->word_char[2] = UINT32_C (0x87fffffe);
+ dfa->word_char[3] = UINT32_C (0x07fffffe);
+ i = 4;
+ }
+ else
+ abort ();
+ ch = 128;
+
+ if (BE (dfa->is_utf8, 1))
+ {
+ memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);
+ return;
+ }
+ }
+
+ for (; i < BITSET_WORDS; ++i)
+ for (int j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
if (isalnum (ch) || ch == '_')
dfa->word_char[i] |= (bitset_word_t) 1 << j;
}