aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--string/test-memcmp.c11
-rw-r--r--sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S20
3 files changed, 23 insertions, 15 deletions
diff --git a/ChangeLog b/ChangeLog
index 4f1ef82c51..12f1e3bce6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2017-06-23 Florian Weimer <fweimer@redhat.com>
+
+ [BZ #21662]
+ * sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S (between_2_3):
+ Use only 24 bits of the register before the subtraction.
+ * string/test-memcmp.c (check1): Check with different lengths.
+
2017-06-23 Gabriel F. T. Gomes <gftg@linux.vnet.ibm.com>
* sysdeps/ieee754/float128/Makefile (CFLAGS-strfromf128.c): Add
diff --git a/string/test-memcmp.c b/string/test-memcmp.c
index a7969edaea..1538930534 100644
--- a/string/test-memcmp.c
+++ b/string/test-memcmp.c
@@ -441,11 +441,12 @@ check1 (void)
n = 116;
for (size_t i = 0; i < n; i++)
- {
- exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, n - i);
- FOR_EACH_IMPL (impl, 0)
- check_result (impl, s1 + i, s2 + i, n - i, exp_result);
- }
+ for (size_t len = 0; len <= n - i; ++len)
+ {
+ exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, len);
+ FOR_EACH_IMPL (impl, 0)
+ check_result (impl, s1 + i, s2 + i, len, exp_result);
+ }
}
/* This test checks that memcmp doesn't overrun buffers. */
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
index 47630dd97b..9d1921033e 100644
--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
@@ -137,18 +137,18 @@ L(exit):
.p2align 4
L(between_2_3):
- /* Load as big endian with overlapping loads and bswap to avoid
- branches. */
- movzwl -2(%rdi, %rdx), %eax
- movzwl -2(%rsi, %rdx), %ecx
- shll $16, %eax
- shll $16, %ecx
- movzwl (%rdi), %edi
- movzwl (%rsi), %esi
- orl %edi, %eax
- orl %esi, %ecx
+ /* Load as big endian to avoid branches. */
+ movzwl (%rdi), %eax
+ movzwl (%rsi), %ecx
+ shll $8, %eax
+ shll $8, %ecx
bswap %eax
bswap %ecx
+ movzbl -1(%rdi, %rdx), %edi
+ movzbl -1(%rsi, %rdx), %esi
+ orl %edi, %eax
+ orl %esi, %ecx
+ /* Subtraction is okay because the upper 8 bits a zero. */
subl %ecx, %eax
ret