aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoah Goldstein <goldstein.w.n@gmail.com>2022-06-06 21:11:33 -0700
committerSunil K Pandey <skpgkp2@gmail.com>2022-07-18 22:13:57 -0700
commit992086b659a897e9019e97d82d3589adba317e6e (patch)
treec444e5c8c148537df79f7741e5368d715da7cc62
parent58ca9728a8005aed7dfdd9a4890a41605bfe040f (diff)
downloadglibc-992086b659a897e9019e97d82d3589adba317e6e.tar
glibc-992086b659a897e9019e97d82d3589adba317e6e.tar.gz
glibc-992086b659a897e9019e97d82d3589adba317e6e.tar.bz2
glibc-992086b659a897e9019e97d82d3589adba317e6e.zip
x86: Shrink code size of memchr-avx2.S
This is not meant as a performance optimization. The previous code was far to liberal in aligning targets and wasted code size unnecissarily. The total code size saving is: 59 bytes There are no major changes in the benchmarks. Geometric Mean of all benchmarks New / Old: 0.967 Full xcheck passes on x86_64. Reviewed-by: H.J. Lu <hjl.tools@gmail.com> (cherry picked from commit 6dcbb7d95dded20153b12d76d2f4e0ef0cda4f35) x86: Fix page cross case in rawmemchr-avx2 [BZ #29234] commit 6dcbb7d95dded20153b12d76d2f4e0ef0cda4f35 Author: Noah Goldstein <goldstein.w.n@gmail.com> Date: Mon Jun 6 21:11:33 2022 -0700 x86: Shrink code size of memchr-avx2.S Changed how the page cross case aligned string (rdi) in rawmemchr. This was incompatible with how `L(cross_page_continue)` expected the pointer to be aligned and would cause rawmemchr to read data start started before the beginning of the string. What it would read was in valid memory but could count CHAR matches resulting in an incorrect return value. This commit fixes that issue by essentially reverting the changes to the L(page_cross) case as they didn't really matter. Test cases added and all pass with the new code (and where confirmed to fail with the old code). Reviewed-by: H.J. Lu <hjl.tools@gmail.com> (cherry picked from commit 2c9af8421d2b4a7fcce163e7bc81a118d22fd346)
-rw-r--r--string/test-rawmemchr.c57
-rw-r--r--sysdeps/x86_64/multiarch/memchr-avx2-rtm.S1
-rw-r--r--sysdeps/x86_64/multiarch/memchr-avx2.S109
3 files changed, 116 insertions, 51 deletions
diff --git a/string/test-rawmemchr.c b/string/test-rawmemchr.c
index 085098aba8..327c0654e6 100644
--- a/string/test-rawmemchr.c
+++ b/string/test-rawmemchr.c
@@ -18,6 +18,7 @@
<https://www.gnu.org/licenses/>. */
#include <assert.h>
+#include <support/xunistd.h>
#define TEST_MAIN
#define TEST_NAME "rawmemchr"
@@ -52,12 +53,44 @@ do_one_test (impl_t *impl, const char *s, int c, char *exp_res)
}
static void
+do_test_bz29234 (void)
+{
+ size_t i, j;
+ char *ptr_start;
+ char *buf = xmmap (0, 8192, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1);
+
+ memset (buf, -1, 8192);
+
+ ptr_start = buf + 4096 - 8;
+
+ /* Out of range matches before the start of a page. */
+ memset (ptr_start - 8, 0x1, 8);
+
+ for (j = 0; j < 8; ++j)
+ {
+ for (i = 0; i < 128; ++i)
+ {
+ ptr_start[i + j] = 0x1;
+
+ FOR_EACH_IMPL (impl, 0)
+ do_one_test (impl, (char *) (ptr_start + j), 0x1,
+ ptr_start + i + j);
+
+ ptr_start[i + j] = 0xff;
+ }
+ }
+
+ xmunmap (buf, 8192);
+}
+
+static void
do_test (size_t align, size_t pos, size_t len, int seek_char)
{
size_t i;
char *result;
- align &= 7;
+ align &= getpagesize () - 1;
if (align + len >= page_size)
return;
@@ -115,6 +148,13 @@ do_random_tests (void)
}
}
+ if (align)
+ {
+ p[align - 1] = seek_char;
+ if (align > 4)
+ p[align - 4] = seek_char;
+ }
+
assert (pos < len);
size_t r = random ();
if ((r & 31) == 0)
@@ -130,6 +170,13 @@ do_random_tests (void)
result, p);
ret = 1;
}
+
+ if (align)
+ {
+ p[align - 1] = seek_char;
+ if (align > 4)
+ p[align - 4] = seek_char;
+ }
}
}
@@ -151,14 +198,22 @@ test_main (void)
do_test (i, 64, 256, 23);
do_test (0, 16 << i, 2048, 0);
do_test (i, 64, 256, 0);
+
+ do_test (getpagesize () - i, 64, 256, 23);
+ do_test (getpagesize () - i, 64, 256, 0);
}
for (i = 1; i < 32; ++i)
{
do_test (0, i, i + 1, 23);
do_test (0, i, i + 1, 0);
+
+ do_test (getpagesize () - 7, i, i + 1, 23);
+ do_test (getpagesize () - i / 2, i, i + 1, 23);
+ do_test (getpagesize () - i, i, i + 1, 23);
}
do_random_tests ();
+ do_test_bz29234 ();
return ret;
}
diff --git a/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S
index 87b076c7c4..c4d71938c5 100644
--- a/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S
+++ b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S
@@ -2,6 +2,7 @@
# define MEMCHR __memchr_avx2_rtm
#endif
+#define COND_VZEROUPPER COND_VZEROUPPER_XTEST
#define ZERO_UPPER_VEC_REGISTERS_RETURN \
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S
index afdb956502..9e0b7dd1f4 100644
--- a/sysdeps/x86_64/multiarch/memchr-avx2.S
+++ b/sysdeps/x86_64/multiarch/memchr-avx2.S
@@ -57,7 +57,7 @@
# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
.section SECTION(.text),"ax",@progbits
-ENTRY (MEMCHR)
+ENTRY_P2ALIGN (MEMCHR, 5)
# ifndef USE_AS_RAWMEMCHR
/* Check for zero length. */
# ifdef __ILP32__
@@ -87,12 +87,14 @@ ENTRY (MEMCHR)
# endif
testl %eax, %eax
jz L(aligned_more)
- tzcntl %eax, %eax
+ bsfl %eax, %eax
addq %rdi, %rax
- VZEROUPPER_RETURN
+L(return_vzeroupper):
+ ZERO_UPPER_VEC_REGISTERS_RETURN
+
# ifndef USE_AS_RAWMEMCHR
- .p2align 5
+ .p2align 4
L(first_vec_x0):
/* Check if first match was before length. */
tzcntl %eax, %eax
@@ -100,58 +102,31 @@ L(first_vec_x0):
/* NB: Multiply length by 4 to get byte count. */
sall $2, %edx
# endif
- xorl %ecx, %ecx
+ COND_VZEROUPPER
+ /* Use branch instead of cmovcc so L(first_vec_x0) fits in one fetch
+ block. branch here as opposed to cmovcc is not that costly. Common
+ usage of memchr is to check if the return was NULL (if string was
+ known to contain CHAR user would use rawmemchr). This branch will be
+ highly correlated with the user branch and can be used by most
+ modern branch predictors to predict the user branch. */
cmpl %eax, %edx
- leaq (%rdi, %rax), %rax
- cmovle %rcx, %rax
- VZEROUPPER_RETURN
-
-L(null):
- xorl %eax, %eax
- ret
-# endif
- .p2align 4
-L(cross_page_boundary):
- /* Save pointer before aligning as its original value is
- necessary for computer return address if byte is found or
- adjusting length if it is not and this is memchr. */
- movq %rdi, %rcx
- /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr
- and rdi for rawmemchr. */
- orq $(VEC_SIZE - 1), %ALGN_PTR_REG
- VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1
- vpmovmskb %ymm1, %eax
-# ifndef USE_AS_RAWMEMCHR
- /* Calculate length until end of page (length checked for a
- match). */
- leaq 1(%ALGN_PTR_REG), %rsi
- subq %RRAW_PTR_REG, %rsi
-# ifdef USE_AS_WMEMCHR
- /* NB: Divide bytes by 4 to get wchar_t count. */
- shrl $2, %esi
-# endif
-# endif
- /* Remove the leading bytes. */
- sarxl %ERAW_PTR_REG, %eax, %eax
-# ifndef USE_AS_RAWMEMCHR
- /* Check the end of data. */
- cmpq %rsi, %rdx
- jbe L(first_vec_x0)
+ jle L(null)
+ addq %rdi, %rax
+ ret
# endif
- testl %eax, %eax
- jz L(cross_page_continue)
- tzcntl %eax, %eax
- addq %RRAW_PTR_REG, %rax
-L(return_vzeroupper):
- ZERO_UPPER_VEC_REGISTERS_RETURN
- .p2align 4
+ .p2align 4,, 10
L(first_vec_x1):
- tzcntl %eax, %eax
+ bsfl %eax, %eax
incq %rdi
addq %rdi, %rax
VZEROUPPER_RETURN
-
+# ifndef USE_AS_RAWMEMCHR
+ /* First in aligning bytes here. */
+L(null):
+ xorl %eax, %eax
+ ret
+# endif
.p2align 4
L(first_vec_x2):
tzcntl %eax, %eax
@@ -340,7 +315,7 @@ L(first_vec_x1_check):
incq %rdi
addq %rdi, %rax
VZEROUPPER_RETURN
- .p2align 4
+ .p2align 4,, 6
L(set_zero_end):
xorl %eax, %eax
VZEROUPPER_RETURN
@@ -428,5 +403,39 @@ L(last_vec_x3):
VZEROUPPER_RETURN
# endif
+ .p2align 4
+L(cross_page_boundary):
+ /* Save pointer before aligning as its original value is necessary for
+ computer return address if byte is found or adjusting length if it
+ is not and this is memchr. */
+ movq %rdi, %rcx
+ /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr
+ and rdi for rawmemchr. */
+ orq $(VEC_SIZE - 1), %ALGN_PTR_REG
+ VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+# ifndef USE_AS_RAWMEMCHR
+ /* Calculate length until end of page (length checked for a match). */
+ leaq 1(%ALGN_PTR_REG), %rsi
+ subq %RRAW_PTR_REG, %rsi
+# ifdef USE_AS_WMEMCHR
+ /* NB: Divide bytes by 4 to get wchar_t count. */
+ shrl $2, %esi
+# endif
+# endif
+ /* Remove the leading bytes. */
+ sarxl %ERAW_PTR_REG, %eax, %eax
+# ifndef USE_AS_RAWMEMCHR
+ /* Check the end of data. */
+ cmpq %rsi, %rdx
+ jbe L(first_vec_x0)
+# endif
+ testl %eax, %eax
+ jz L(cross_page_continue)
+ bsfl %eax, %eax
+ addq %RRAW_PTR_REG, %rax
+ VZEROUPPER_RETURN
+
+
END (MEMCHR)
#endif