aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/strlen.S
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2010-08-26 22:09:34 -0700
committerUlrich Drepper <drepper@redhat.com>2010-08-26 22:09:34 -0700
commit623aac7f84dfddee9bcf9d51f23612479cf672ec (patch)
tree355c57e1d98cff706ead0832461b060bc24ffc7c /sysdeps/x86_64/strlen.S
parentb416a900856ff871c06b08fa2c9c943fd86597da (diff)
downloadglibc-623aac7f84dfddee9bcf9d51f23612479cf672ec.tar
glibc-623aac7f84dfddee9bcf9d51f23612479cf672ec.tar.gz
glibc-623aac7f84dfddee9bcf9d51f23612479cf672ec.tar.bz2
glibc-623aac7f84dfddee9bcf9d51f23612479cf672ec.zip
Unroll x86-64 strlen
Diffstat (limited to 'sysdeps/x86_64/strlen.S')
-rw-r--r--sysdeps/x86_64/strlen.S97
1 files changed, 76 insertions, 21 deletions
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index 93aee6bef1..7880c1d5e5 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -1,6 +1,7 @@
/* strlen(str) -- determine the length of the string STR.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009, 2010 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@redhat.com>.
+ Modified by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -23,29 +24,83 @@
.text
ENTRY(strlen)
- pxor %xmm2, %xmm2
- movq %rdi, %rcx
- movq %rdi, %r8
- andq $~15, %rdi
- movdqa %xmm2, %xmm1
- pcmpeqb (%rdi), %xmm2
- orl $0xffffffff, %esi
- subq %rdi, %rcx
- shll %cl, %esi
- pmovmskb %xmm2, %edx
- andl %esi, %edx
- jnz 1f
-
-2: movdqa 16(%rdi), %xmm0
- leaq 16(%rdi), %rdi
+ xor %rax, %rax
+ mov %edi, %ecx
+ and $0x3f, %ecx
+ pxor %xmm0, %xmm0
+ cmp $0x30, %ecx
+ ja L(next)
+ movdqu (%rdi), %xmm1
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %edx
- testl %edx, %edx
- jz 2b
+ test %edx, %edx
+ jnz L(exit_less16)
+ mov %rdi, %rax
+ and $-16, %rax
+ jmp L(align16_start)
+L(next):
+ mov %rdi, %rax
+ and $-16, %rax
+ pcmpeqb (%rax), %xmm0
+ mov $-1, %esi
+ sub %rax, %rcx
+ shl %cl, %esi
+ pmovmskb %xmm0, %edx
+ and %esi, %edx
+ jnz L(exit)
+L(align16_start):
+ pxor %xmm0, %xmm0
+ pxor %xmm1, %xmm1
+ pxor %xmm2, %xmm2
+ pxor %xmm3, %xmm3
+ .p2align 4
+L(align16_loop):
+ pcmpeqb 16(%rax), %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit16)
-1: subq %r8, %rdi
- bsfl %edx, %eax
- addq %rdi, %rax
+ pcmpeqb 32(%rax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ jnz L(exit32)
+
+ pcmpeqb 48(%rax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ jnz L(exit48)
+
+ pcmpeqb 64(%rax), %xmm3
+ pmovmskb %xmm3, %edx
+ lea 64(%rax), %rax
+ test %edx, %edx
+ jz L(align16_loop)
+L(exit):
+ sub %rdi, %rax
+L(exit_less16):
+ bsf %rdx, %rdx
+ add %rdx, %rax
+ ret
+ .p2align 4
+L(exit16):
+ sub %rdi, %rax
+ bsf %rdx, %rdx
+ add %rdx, %rax
+ add $16, %rax
+ ret
+ .p2align 4
+L(exit32):
+ sub %rdi, %rax
+ bsf %rdx, %rdx
+ add %rdx, %rax
+ add $32, %rax
+ ret
+ .p2align 4
+L(exit48):
+ sub %rdi, %rax
+ bsf %rdx, %rdx
+ add %rdx, %rax
+ add $48, %rax
ret
END(strlen)
libc_hidden_builtin_def (strlen)