aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2016-04-05 05:21:07 -0700
committerH.J. Lu <hjl.tools@gmail.com>2016-04-05 05:21:19 -0700
commitec0cac9a1f4094bd0db6f77c1b329e7a40eecc10 (patch)
tree127fd0493c624c8ae57cd8af979bdb88c5f6117e
parent696ac774847b80cf994438739478b0c3003b5958 (diff)
downloadglibc-ec0cac9a1f4094bd0db6f77c1b329e7a40eecc10.tar
glibc-ec0cac9a1f4094bd0db6f77c1b329e7a40eecc10.tar.gz
glibc-ec0cac9a1f4094bd0db6f77c1b329e7a40eecc10.tar.bz2
glibc-ec0cac9a1f4094bd0db6f77c1b329e7a40eecc10.zip
Force 32-bit displacement in memset-vec-unaligned-erms.S
* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S: Force 32-bit displacement to avoid long nop between instructions.
-rw-r--r--ChangeLog5
-rw-r--r--sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S13
2 files changed, 18 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 4b2f408e89..048299a45e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2016-04-05 H.J. Lu <hongjiu.lu@intel.com>
+ * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S: Force
+ 32-bit displacement to avoid long nop between instructions.
+
+2016-04-05 H.J. Lu <hongjiu.lu@intel.com>
+
* sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S: Add
a comment on VMOVU and VMOVA.
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index 9383517536..fe0f74516d 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -159,10 +159,23 @@ L(return):
.p2align 4
L(loop_start):
leaq (VEC_SIZE * 4)(%rdi), %rcx
+# if VEC_SIZE == 32 || VEC_SIZE == 64
+ /* Force 32-bit displacement to avoid long nop between
+ instructions. */
+ VMOVU.d32 %VEC(0), (%rdi)
+# else
VMOVU %VEC(0), (%rdi)
+# endif
andq $-(VEC_SIZE * 4), %rcx
+# if VEC_SIZE == 32
+ /* Force 32-bit displacement to avoid long nop between
+ instructions. */
+ VMOVU.d32 %VEC(0), -VEC_SIZE(%rdi,%rdx)
+ VMOVU.d32 %VEC(0), VEC_SIZE(%rdi)
+# else
VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
VMOVU %VEC(0), VEC_SIZE(%rdi)
+# endif
VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
VMOVU %VEC(0), (VEC_SIZE * 2)(%rdi)
VMOVU %VEC(0), -(VEC_SIZE * 3)(%rdi,%rdx)