aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/i386/i686
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-05-19 10:46:29 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-05-19 10:48:45 -0700
commit402bf0695218bbe290418b9486b1dd5fe284d903 (patch)
tree0107d383f8a38c75076dae69996b15b46e13b04a /sysdeps/i386/i686
parent1d71a6315396f6e1cc79a1d7ecca0a559929230a (diff)
downloadglibc-402bf0695218bbe290418b9486b1dd5fe284d903.tar
glibc-402bf0695218bbe290418b9486b1dd5fe284d903.tar.gz
glibc-402bf0695218bbe290418b9486b1dd5fe284d903.tar.bz2
glibc-402bf0695218bbe290418b9486b1dd5fe284d903.zip
x86: Optimize SSE2 memchr overflow calculation
SSE2 memchr computes "edx + ecx - 16" where ecx is less than 16. Use "edx - (16 - ecx)", instead of satured math, to avoid possible addition overflow. This replaces add %ecx, %edx sbb %eax, %eax or %eax, %edx sub $16, %edx with neg %ecx add $16, %ecx sub %ecx, %edx It is the same for x86_64, except for rcx/rdx, instead of ecx/edx. * sysdeps/i386/i686/multiarch/memchr-sse2.S (MEMCHR): Use "edx + ecx - 16" to avoid possible addition overflow. * sysdeps/x86_64/memchr.S (memchr): Likewise.
Diffstat (limited to 'sysdeps/i386/i686')
-rw-r--r--sysdeps/i386/i686/multiarch/memchr-sse2.S14
1 files changed, 6 insertions, 8 deletions
diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2.S b/sysdeps/i386/i686/multiarch/memchr-sse2.S
index e41f324a77..172d70de13 100644
--- a/sysdeps/i386/i686/multiarch/memchr-sse2.S
+++ b/sysdeps/i386/i686/multiarch/memchr-sse2.S
@@ -117,14 +117,12 @@ L(crosscache):
# ifndef USE_AS_RAWMEMCHR
jnz L(match_case2_prolog1)
- /* Calculate the last acceptable address and check for possible
- addition overflow by using satured math:
- edx = ecx + edx
- edx |= -(edx < ecx) */
- add %ecx, %edx
- sbb %eax, %eax
- or %eax, %edx
- sub $16, %edx
+ /* "ecx" is less than 16. Calculate "edx + ecx - 16" by using
+ "edx - (16 - ecx)" instead of "(edx + ecx) - 16" to void
+ possible addition overflow. */
+ neg %ecx
+ add $16, %ecx
+ sub %ecx, %edx
jbe L(return_null)
lea 16(%edi), %edi
# else