aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLiubov Dmitrieva <liubov.dmitrieva@gmail.com>2011-12-23 08:50:39 -0500
committerUlrich Drepper <drepper@gmail.com>2011-12-23 08:50:39 -0500
commitc044cf14b0238b6e866f4ef5f8907d6680230212 (patch)
tree174941df1d04f8fd18b0cc93401b50a4a97d7a88
parentd455f537be05dac05b8fc67e58f13fd85d553c74 (diff)
downloadglibc-c044cf14b0238b6e866f4ef5f8907d6680230212.tar
glibc-c044cf14b0238b6e866f4ef5f8907d6680230212.tar.gz
glibc-c044cf14b0238b6e866f4ef5f8907d6680230212.tar.bz2
glibc-c044cf14b0238b6e866f4ef5f8907d6680230212.zip
Fix wrong copying processing for last bytes in x86-32 wcscpy
Wrong copy algorithm for last bytes, not thread safety. In some particular cases it uses the destination memory beyond the string end for 16-byte load, puts changes into that part that is relevant to destination string and writes whole 16-byte chunk into memory. I have a test case where the memory beyond the string end contains malloc/free data, that appear corrupted in case free() updates it in between the 16-byte read and 16-byte write.
-rw-r--r--ChangeLog5
-rw-r--r--sysdeps/i386/i686/multiarch/wcscpy-ssse3.S62
2 files changed, 26 insertions, 41 deletions
diff --git a/ChangeLog b/ChangeLog
index 2866f71a72..7d77002db1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2011-12-23 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
+
+ * sysdeps/i386/i686/multiarch/wcscpy-ssse3.S: Fix wrong copying
+ processing for last bytes.
+
2011-08-06 Bruno Haible <bruno@clisp.org>
[BZ #13061]
diff --git a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
index 84d92a8bde..abeea22266 100644
--- a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
+++ b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
@@ -54,7 +54,6 @@ ENTRY (__wcscpy_ssse3)
PUSH (%edi)
mov %edx, %edi
-
PUSH (%esi)
lea 16(%ecx), %esi
@@ -220,7 +219,6 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -228,15 +226,14 @@ L(Shl4Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl4LoopExit)
- palignr $4, %xmm1, %xmm2
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
@@ -248,7 +245,6 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -256,13 +252,11 @@ L(Shl4Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl4LoopExit)
- palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
@@ -305,14 +299,13 @@ L(Shl4LoopStart):
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
- movaps (%edx), %xmm6
- psrldq $12, %xmm6
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%edx)
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 8(%edx)
+ POP (%esi)
add $12, %edx
add $12, %ecx
-
- POP (%esi)
test %al, %al
jz L(ExitHigh)
test $0x01, %al
@@ -337,7 +330,6 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -345,15 +337,14 @@ L(Shl8Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl8LoopExit)
- palignr $8, %xmm1, %xmm2
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
@@ -365,7 +356,6 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -373,13 +363,11 @@ L(Shl8Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl8LoopExit)
- palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
@@ -422,14 +410,11 @@ L(Shl8LoopStart):
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
- movaps (%edx), %xmm6
- psrldq $8, %xmm6
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%edx)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ POP (%esi)
add $8, %edx
add $8, %ecx
-
- POP (%esi)
test %al, %al
jz L(ExitHigh)
test $0x01, %al
@@ -454,7 +439,6 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -462,15 +446,14 @@ L(Shl12Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
+ movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl12LoopExit)
- palignr $12, %xmm1, %xmm2
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
- movaps %xmm3, %xmm1
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
@@ -482,7 +465,6 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -490,13 +472,11 @@ L(Shl12Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl12LoopExit)
- palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
@@ -539,11 +519,9 @@ L(Shl12LoopStart):
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
- movaps (%edx), %xmm6
- psrldq $4, %xmm6
+ movl (%ecx), %esi
+ movl %esi, (%edx)
mov $4, %esi
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%edx)
.p2align 4
L(CopyFrom1To16Bytes):
@@ -555,6 +533,7 @@ L(CopyFrom1To16Bytes):
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit4)
+L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
@@ -564,6 +543,7 @@ L(CopyFrom1To16Bytes):
L(ExitHigh):
test $0x01, %ah
jnz L(Exit12)
+L(Exit16):
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
movl %edi, %eax