x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]

This code: L(between_2_3): /* Load as big endian with overlapping loads and bswap to avoid branches. */ movzwl -2(%rdi, %rdx), %eax movzwl -2(%rsi, %rdx), %ecx shll $16, %eax shll $16, %ecx movzwl (%rdi), %edi movzwl (%rsi), %esi orl %edi, %eax orl %esi, %ecx bswap %eax bswap %ecx subl %ecx, %eax ret needs a saturating subtract because the full register is used. With this commit, only the lower 24 bits of the register are used, so a regular subtraction suffices. The test case change adds coverage for these kinds of bugs.
author: Florian Weimer <fweimer@redhat.com> 2017-06-23 17:23:44 +0200
committer: Florian Weimer <fweimer@redhat.com> 2017-06-23 17:24:40 +0200
commit: 3ec7c02cc3e922b9364dc8cfd1d4546671b91003 (patch)
tree: 283d54448fe89359272093156316884e61992c9a
parent: 7fa1d9462baabc5a1058efc13a48444af4678acf (diff)
download: glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.tar
glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.tar.gz
glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.tar.bz2
glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.zip
3 files changed, 23 insertions, 15 deletions
diff --git a/ChangeLog b/ChangeLog
index 4f1ef82c51..12f1e3bce6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2017-06-23  Florian Weimer  <fweimer@redhat.com>
+
+	[BZ #21662]
+	* sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S (between_2_3):
+	Use only 24 bits of the register before the subtraction.
+	* string/test-memcmp.c (check1): Check with different lengths.
+
 2017-06-23  Gabriel F. T. Gomes  <gftg@linux.vnet.ibm.com>
 
 	* sysdeps/ieee754/float128/Makefile (CFLAGS-strfromf128.c): Add
diff --git a/string/test-memcmp.c b/string/test-memcmp.c
index a7969edaea..1538930534 100644
--- a/string/test-memcmp.c
+++ b/string/test-memcmp.c
@@ -441,11 +441,12 @@ check1 (void)
 
   n = 116;
   for (size_t i = 0; i < n; i++)
-    {
-      exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, n - i);
-      FOR_EACH_IMPL (impl, 0)
-	check_result (impl, s1 + i, s2 + i, n - i, exp_result);
-    }
+    for (size_t len = 0; len <= n - i; ++len)
+      {
+	exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, len);
+	FOR_EACH_IMPL (impl, 0)
+	  check_result (impl, s1 + i, s2 + i, len, exp_result);
+      }
 }
 
 /* This test checks that memcmp doesn't overrun buffers.  */
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
index 47630dd97b..9d1921033e 100644
--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
@@ -137,18 +137,18 @@ L(exit):
 
 	.p2align 4
 L(between_2_3):
-	/* Load as big endian with overlapping loads and bswap to avoid
-	   branches.  */
-	movzwl	-2(%rdi, %rdx), %eax
-	movzwl	-2(%rsi, %rdx), %ecx
-	shll	$16, %eax
-	shll	$16, %ecx
-	movzwl	(%rdi), %edi
-	movzwl	(%rsi), %esi
-	orl	%edi, %eax
-	orl	%esi, %ecx
+	/* Load as big endian to avoid branches.  */
+	movzwl	(%rdi), %eax
+	movzwl	(%rsi), %ecx
+	shll	$8, %eax
+	shll	$8, %ecx
 	bswap	%eax
 	bswap	%ecx
+	movzbl	-1(%rdi, %rdx), %edi
+	movzbl	-1(%rsi, %rdx), %esi
+	orl	%edi, %eax
+	orl	%esi, %ecx
+	/* Subtraction is okay because the upper 8 bits a zero.  */
 	subl	%ecx, %eax
 	ret
author	Florian Weimer <fweimer@redhat.com>	2017-06-23 17:23:44 +0200
committer	Florian Weimer <fweimer@redhat.com>	2017-06-23 17:24:40 +0200
commit	3ec7c02cc3e922b9364dc8cfd1d4546671b91003 (patch)
tree	283d54448fe89359272093156316884e61992c9a
parent	7fa1d9462baabc5a1058efc13a48444af4678acf (diff)
download	glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.tar glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.tar.gz glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.tar.bz2 glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.zip