From d58ab810a6e325cc351684d174c48cabce01bcc1 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Wed, 14 Dec 2016 15:12:18 +0000 Subject: Improve strtok and strtok_r performance. Instead of calling strpbrk which calls strcspn, call strcspn directly so we get the end of the token without an extra call to rawmemchr. Also avoid an unnecessary call to strcspn after the last token by adding an early exit for an empty string. Change strtok to tailcall strtok_r to avoid unnecessary code duplication. Remove the special header optimization for strtok_r of a 1-character constant string - both strspn and strcspn contain optimizations for this case. Benchmarking this showed similar performance in the worst case, but up to 5.5x better performance in the "found" case for large inputs. * benchtests/bench-strtok.c (oldstrtok): Add old implementation. * string/strtok.c (strtok): Change to tailcall __strtok_r. * string/strtok_r.c (__strtok_r): Optimize for performance. * string/string-inlines.c (__old_strtok_r_1c): New function. * string/bits/string2.h (__strtok_r): Move to string-inlines.c. --- string/string-inlines.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'string/string-inlines.c') diff --git a/string/string-inlines.c b/string/string-inlines.c index 1091468519..d43e5897c3 100644 --- a/string/string-inlines.c +++ b/string/string-inlines.c @@ -35,6 +35,36 @@ #include "shlib-compat.h" +#if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_25) +/* The inline functions are not used from GLIBC 2.25 and forward, however + they are required to provide the symbols through string-inlines.c + (if inlining is not possible for compatibility reasons). */ + +char * +__old_strtok_r_1c (char *__s, char __sep, char **__nextp) +{ + char *__result; + if (__s == NULL) + __s = *__nextp; + while (*__s == __sep) + ++__s; + __result = NULL; + if (*__s != '\0') + { + __result = __s++; + while (*__s != '\0') + if (*__s++ == __sep) + { + __s[-1] = '\0'; + break; + } + } + *__nextp = __s; + return __result; +} +compat_symbol (libc, __old_strtok_r_1c, __strtok_r_1c, GLIBC_2_1_1); +#endif + #if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_24) /* The inline functions are not used from GLIBC 2.24 and forward, however they are required to provide the symbols through string-inlines.c -- cgit v1.2.3-70-g09d2