aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog15
-rw-r--r--sysdeps/powerpc/powerpc64/power8/memchr.S31
-rw-r--r--sysdeps/powerpc/powerpc64/power8/memcmp.S21
-rw-r--r--sysdeps/powerpc/powerpc64/power8/memrchr.S30
-rw-r--r--sysdeps/powerpc/powerpc64/power8/memset.S10
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strchr.S30
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strlen.S23
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strncmp.S2
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strncpy.S2
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strnlen.S51
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strrchr.S39
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strspn.S35
12 files changed, 102 insertions, 187 deletions
diff --git a/ChangeLog b/ChangeLog
index a8a607873d..d12147ea8c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2019-08-01 Raoni Fassina Firmino <raoni@linux.ibm.com>
+
+ * sysdeps/powerpc/powerpc64/power8/memchr.S: Update power8
+ mnemonics and set .machine power8.
+ * sysdeps/powerpc/powerpc64/power8/memcmp.S: Likewise.
+ * sysdeps/powerpc/powerpc64/power8/memrchr.S: Likewise.
+ * sysdeps/powerpc/powerpc64/power8/memset.S: Likewise.
+ * sysdeps/powerpc/powerpc64/power8/strchr.S: Likewise.
+ * sysdeps/powerpc/powerpc64/power8/strlen.S: Likewise.
+ * sysdeps/powerpc/powerpc64/power8/strncmp.S: Likewise.
+ * sysdeps/powerpc/powerpc64/power8/strncpy.S: Likewise.
+ * sysdeps/powerpc/powerpc64/power8/strnlen.S: Likewise.
+ * sysdeps/powerpc/powerpc64/power8/strrchr.S: Likewise.
+ * sysdeps/powerpc/powerpc64/power8/strspn.S: Likewise.
+
2019-08-01 Adhemerval Zanella <adhemerval.zanella@linaro.org>
* sysdeps/hppa/fpu/libm-test-ulps: Update.
diff --git a/sysdeps/powerpc/powerpc64/power8/memchr.S b/sysdeps/powerpc/powerpc64/power8/memchr.S
index 422002f256..c4a11278d6 100644
--- a/sysdeps/powerpc/powerpc64/power8/memchr.S
+++ b/sysdeps/powerpc/powerpc64/power8/memchr.S
@@ -20,21 +20,10 @@
/* void *[r3] memchr (const void *s [r3], int c [r4], size_t n [r5]) */
-/* TODO: change these to the actual instructions when the minimum required
- binutils allows it. */
-#define MTVRD(v, r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define MFVRD(r, v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define VBPERMQ(t, a, b) .long (0x1000054c \
- | ((t)<<(32-11)) \
- | ((a)<<(32-16)) \
- | ((b)<<(32-21)) )
-
#ifndef MEMCHR
# define MEMCHR __memchr
#endif
-/* TODO: change this to .machine power8 when the minimum required binutils
- allows it. */
- .machine power7
+ .machine power8
ENTRY_TOCLESS (MEMCHR)
CALL_MCOUNT 3
dcbt 0, r3
@@ -97,7 +86,7 @@ L(align_qw):
li r0, 0
lvsl v11, r0, r0
vslb v10, v11, v10
- MTVRD(v1, r4)
+ mtvrd v1, r4
vspltb v1, v1, 7
cmpldi r5, 64
ble L(tail64)
@@ -210,10 +199,10 @@ L(tail64):
.align 4
L(found):
/* Permute the first bit of each byte into bits 48-63. */
- VBPERMQ(v6, v6, v10)
- VBPERMQ(v7, v7, v10)
- VBPERMQ(v8, v8, v10)
- VBPERMQ(v9, v9, v10)
+ vbpermq v6, v6, v10
+ vbpermq v7, v7, v10
+ vbpermq v8, v8, v10
+ vbpermq v9, v9, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
vsldoi v7, v7, v7, 2
@@ -228,7 +217,7 @@ L(found):
vor v11, v6, v7
vor v4, v9, v8
vor v4, v11, v4
- MFVRD(r5, v4)
+ mfvrd r5, v4
#ifdef __LITTLE_ENDIAN__
addi r6, r5, -1
andc r6, r6, r5
@@ -243,16 +232,16 @@ L(found):
.align 4
L(found_16B):
/* Permute the first bit of each byte into bits 48-63. */
- VBPERMQ(v6, v6, v10)
+ vbpermq v6, v6, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
- MFVRD(r7, v6)
+ mfvrd r7, v6
addi r6, r7, -1
andc r6, r6, r7
popcntd r6, r6
#else
vsldoi v6, v6, v6, 6
- MFVRD(r7, v6)
+ mfvrd r7, v6
cntlzd r6, r7 /* Count leading zeros before the match. */
#endif
add r3, r8, r6 /* Compute final length. */
diff --git a/sysdeps/powerpc/powerpc64/power8/memcmp.S b/sysdeps/powerpc/powerpc64/power8/memcmp.S
index db824d6494..8617d5950b 100644
--- a/sysdeps/powerpc/powerpc64/power8/memcmp.S
+++ b/sysdeps/powerpc/powerpc64/power8/memcmp.S
@@ -22,13 +22,10 @@
const char *s2 [r4],
size_t size [r5]) */
-/* TODO: change these to the actual instructions when the minimum required
- binutils allows it. */
-#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
#ifndef MEMCMP
# define MEMCMP memcmp
#endif
- .machine power7
+ .machine power8
ENTRY_TOCLESS (MEMCMP, 4)
CALL_MCOUNT 3
@@ -231,15 +228,15 @@ L(different2):
vperm v4, v4, v0, v9
vperm v5, v5, v0, v9
#endif
- MFVRD(r7, v4)
- MFVRD(r9, v5)
+ mfvrd r7, v4
+ mfvrd r9, v5
cmpld cr6, r7, r9
bne cr6, L(ret_diff)
/* Difference in second DW. */
vsldoi v4, v4, v4, 8
vsldoi v5, v5, v5, 8
- MFVRD(r7, v4)
- MFVRD(r9, v5)
+ mfvrd r7, v4
+ mfvrd r9, v5
cmpld cr6, r7, r9
L(ret_diff):
li rRTN, 1
@@ -256,15 +253,15 @@ L(different3):
vperm v6, v6, v0, v9
vperm v8, v8, v0, v9
#endif
- MFVRD(r7, v6)
- MFVRD(r9, v8)
+ mfvrd r7, v6
+ mfvrd r9, v8
cmpld cr6, r7, r9
bne cr6, L(ret_diff)
/* Difference in second DW. */
vsldoi v6, v6, v6, 8
vsldoi v8, v8, v8, 8
- MFVRD(r7, v6)
- MFVRD(r9, v8)
+ mfvrd r7, v6
+ mfvrd r9, v8
cmpld cr6, r7, r9
li rRTN, 1
bgtlr cr6
diff --git a/sysdeps/powerpc/powerpc64/power8/memrchr.S b/sysdeps/powerpc/powerpc64/power8/memrchr.S
index 51c6937c5f..77e5b4cc9d 100644
--- a/sysdeps/powerpc/powerpc64/power8/memrchr.S
+++ b/sysdeps/powerpc/powerpc64/power8/memrchr.S
@@ -21,18 +21,10 @@
/* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */
-/* TODO: change these to the actual instructions when the minimum required
- binutils allows it. */
-#define MTVRD(v, r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define MFVRD(r, v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define VBPERMQ(t, a, b) .long (0x1000054c \
- | ((t)<<(32-11)) \
- | ((a)<<(32-16)) \
- | ((b)<<(32-21)) )
#ifndef MEMRCHR
# define MEMRCHR __memrchr
#endif
- .machine power7
+ .machine power8
ENTRY_TOCLESS (MEMRCHR)
CALL_MCOUNT 3
add r7, r3, r5 /* Calculate the last acceptable address. */
@@ -92,7 +84,7 @@ L(align_qw):
li r0, 0
lvsl v11, r0, r0
vslb v10, v11, v10
- MTVRD(v1, r4)
+ mtvrd v1, r4
vspltb v1, v1, 7
cmpldi r5, 64
ble L(tail64)
@@ -205,10 +197,10 @@ L(tail64):
.align 4
L(found):
/* Permute the first bit of each byte into bits 48-63. */
- VBPERMQ(v6, v6, v10)
- VBPERMQ(v7, v7, v10)
- VBPERMQ(v8, v8, v10)
- VBPERMQ(v9, v9, v10)
+ vbpermq v6, v6, v10
+ vbpermq v7, v7, v10
+ vbpermq v8, v8, v10
+ vbpermq v9, v9, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
vsldoi v7, v7, v7, 2
@@ -223,7 +215,7 @@ L(found):
vor v11, v6, v7
vor v4, v9, v8
vor v4, v11, v4
- MFVRD(r5, v4)
+ mfvrd r5, v4
#ifdef __LITTLE_ENDIAN__
cntlzd r6, r5 /* Count leading zeros before the match. */
#else
@@ -245,7 +237,7 @@ L(found_16B):
bge L(last)
/* Now discard bytes before starting address. */
sub r9, r10, r8
- MTVRD(v9, r9)
+ mtvrd v9, r9
vspltisb v8, 3
/* Mask unwanted bytes. */
#ifdef __LITTLE_ENDIAN__
@@ -263,14 +255,14 @@ L(found_16B):
#endif
L(last):
/* Permute the first bit of each byte into bits 48-63. */
- VBPERMQ(v6, v6, v10)
+ vbpermq v6, v6, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
vsldoi v6, v6, v6, 6
- MFVRD(r7, v6)
+ mfvrd r7, v6
cntlzd r6, r7 /* Count leading zeros before the match. */
#else
- MFVRD(r7, v6)
+ mfvrd r7, v6
addi r6, r7, -1
andc r6, r6, r7
popcntd r6, r6
diff --git a/sysdeps/powerpc/powerpc64/power8/memset.S b/sysdeps/powerpc/powerpc64/power8/memset.S
index 436daf40be..3dba1f34d6 100644
--- a/sysdeps/powerpc/powerpc64/power8/memset.S
+++ b/sysdeps/powerpc/powerpc64/power8/memset.S
@@ -18,19 +18,13 @@
#include <sysdep.h>
-#define MTVSRD_V1_R4 .long 0x7c240166 /* mtvsrd v1,r4 */
-
/* void * [r3] memset (void *s [r3], int c [r4], size_t n [r5]));
Returns 's'. */
#ifndef MEMSET
# define MEMSET memset
#endif
-
- /* No need to use .machine power8 since mtvsrd is already
- handled by the define. It avoid breakage on binutils
- that does not support this machine specifier. */
- .machine power7
+ .machine power8
ENTRY_TOCLESS (MEMSET, 5)
CALL_MCOUNT 3
@@ -151,7 +145,7 @@ L(tail_bytes):
vector instruction to achieve best throughput. */
L(huge_vector):
/* Replicate set byte to quadword in VMX register. */
- MTVSRD_V1_R4
+ mtvsrd v1,r4
xxpermdi 32,v0,v1,0
vspltb v2,v0,15
diff --git a/sysdeps/powerpc/powerpc64/power8/strchr.S b/sysdeps/powerpc/powerpc64/power8/strchr.S
index a81ecbbd31..0e7754ebfb 100644
--- a/sysdeps/powerpc/powerpc64/power8/strchr.S
+++ b/sysdeps/powerpc/powerpc64/power8/strchr.S
@@ -33,17 +33,7 @@
#endif /* !USE_AS_STRCHRNUL */
/* int [r3] strchr (char *s [r3], int c [r4]) */
-/* TODO: change these to the actual instructions when the minimum required
- binutils allows it. */
-#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define VBPERMQ(t,a,b) .long (0x1000054c \
- | ((t)<<(32-11)) \
- | ((a)<<(32-16)) \
- | ((b)<<(32-21)) )
-/* TODO: change this to .machine power8 when the minimum required binutils
- allows it. */
- .machine power7
+ .machine power8
ENTRY_TOCLESS (FUNC_NAME)
CALL_MCOUNT 2
dcbt 0,r3
@@ -178,7 +168,7 @@ L(vector):
vspltisb v10, 3
lvsl v11, r0, r0
vslb v10, v11, v10
- MTVRD(v1,r4)
+ mtvrd v1, r4
li r5, 16
vspltb v1, v1, 7
/* Compare 32 bytes in each loop. */
@@ -202,10 +192,10 @@ L(continue):
blt cr6, L(no_match)
#endif
/* Permute the first bit of each byte into bits 48-63. */
- VBPERMQ(v2, v2, v10)
- VBPERMQ(v3, v3, v10)
- VBPERMQ(v6, v6, v10)
- VBPERMQ(v7, v7, v10)
+ vbpermq v2, v2, v10
+ vbpermq v3, v3, v10
+ vbpermq v6, v6, v10
+ vbpermq v7, v7, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
vsldoi v3, v3, v3, 2
@@ -221,7 +211,7 @@ L(continue):
vor v1, v3, v2
vor v2, v6, v7
vor v4, v1, v2
- MFVRD(r5, v4)
+ mfvrd r5, v4
#ifdef __LITTLE_ENDIAN__
addi r6, r5, -1
andc r6, r6, r5
@@ -347,8 +337,8 @@ L(continue1):
blt cr6, L(continue1)
addi r3, r3, -32
L(end1):
- VBPERMQ(v2, v2, v10)
- VBPERMQ(v3, v3, v10)
+ vbpermq v2, v2, v10
+ vbpermq v3, v3, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
vsldoi v3, v3, v3, 2
@@ -359,7 +349,7 @@ L(end1):
/* Merge the results and move to a GPR. */
vor v4, v3, v2
- MFVRD(r5, v4)
+ mfvrd r5, v4
#ifdef __LITTLE_ENDIAN__
addi r6, r5, -1
andc r6, r6, r5
diff --git a/sysdeps/powerpc/powerpc64/power8/strlen.S b/sysdeps/powerpc/powerpc64/power8/strlen.S
index 6436252f82..b73e014272 100644
--- a/sysdeps/powerpc/powerpc64/power8/strlen.S
+++ b/sysdeps/powerpc/powerpc64/power8/strlen.S
@@ -19,23 +19,12 @@
#include <sysdep.h>
-/* TODO: change these to the actual instructions when the minimum required
- binutils allows it. */
-#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define VBPERMQ(t,a,b) .long (0x1000054c \
- | ((t)<<(32-11)) \
- | ((a)<<(32-16)) \
- | ((b)<<(32-21)) )
-
/* int [r3] strlen (char *s [r3]) */
#ifndef STRLEN
# define STRLEN strlen
#endif
-
-/* TODO: change this to .machine power8 when the minimum required binutils
- allows it. */
- .machine power7
+ .machine power8
ENTRY_TOCLESS (STRLEN, 4)
CALL_MCOUNT 1
dcbt 0,r3
@@ -250,10 +239,10 @@ L(vmx_zero):
vslb v10,v11,v10
/* Permute the first bit of each byte into bits 48-63. */
- VBPERMQ(v1,v1,v10)
- VBPERMQ(v2,v2,v10)
- VBPERMQ(v3,v3,v10)
- VBPERMQ(v4,v4,v10)
+ vbpermq v1,v1,v10
+ vbpermq v2,v2,v10
+ vbpermq v3,v3,v10
+ vbpermq v4,v4,v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
@@ -270,7 +259,7 @@ L(vmx_zero):
vor v1,v2,v1
vor v2,v3,v4
vor v4,v1,v2
- MFVRD(r10,v4)
+ mfvrd r10,v4
/* Adjust address to the begninning of the current 64-byte block. */
addi r4,r4,-64
diff --git a/sysdeps/powerpc/powerpc64/power8/strncmp.S b/sysdeps/powerpc/powerpc64/power8/strncmp.S
index 40b5324a49..4a76bc516b 100644
--- a/sysdeps/powerpc/powerpc64/power8/strncmp.S
+++ b/sysdeps/powerpc/powerpc64/power8/strncmp.S
@@ -31,7 +31,7 @@
64K as default, the page cross handling assumes minimum page size of
4k. */
- .machine power7
+ .machine power8
ENTRY_TOCLESS (STRNCMP, 4)
/* Check if size is 0. */
mr. r10,r5
diff --git a/sysdeps/powerpc/powerpc64/power8/strncpy.S b/sysdeps/powerpc/powerpc64/power8/strncpy.S
index 01e9f70250..cb6b7c1a61 100644
--- a/sysdeps/powerpc/powerpc64/power8/strncpy.S
+++ b/sysdeps/powerpc/powerpc64/power8/strncpy.S
@@ -60,7 +60,7 @@
64K as default, the page cross handling assumes minimum page size of
4k. */
- .machine power7
+ .machine power8
#ifdef MEMSET_is_local
ENTRY_TOCLESS (FUNC_NAME, 4)
#else
diff --git a/sysdeps/powerpc/powerpc64/power8/strnlen.S b/sysdeps/powerpc/powerpc64/power8/strnlen.S
index 0a5761bfe7..a85f56cb4e 100644
--- a/sysdeps/powerpc/powerpc64/power8/strnlen.S
+++ b/sysdeps/powerpc/powerpc64/power8/strnlen.S
@@ -33,32 +33,9 @@
/* Define default page size to 4KB. */
#define PAGE_SIZE 4096
-/* The following macros implement Power ISA v2.07 opcodes
- that could not be used directly into this code to the keep
- compatibility with older binutils versions. */
-
-/* Move from vector register doubleword. */
-#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-
-/* Move to vector register doubleword. */
-#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
-
-/* Vector Bit Permute Quadword. */
-#define VBPERMQ(t,a,b) .long (0x1000054c \
- | ((t)<<(32-11)) \
- | ((a)<<(32-16)) \
- | ((b)<<(32-21)) )
-
-/* Vector Population Count Halfword. */
-#define VPOPCNTH(t,b) .long (0x10000743 | ((t)<<(32-11)) | ((b)<<(32-21)))
-
-/* Vector Count Leading Zeros Halfword. */
-#define VCLZH(t,b) .long (0x10000742 | ((t)<<(32-11)) | ((b)<<(32-21)))
-
/* int [r3] strnlen (char *s [r3], size_t maxlen [r4]) */
-/* TODO: change to power8 when minimum required binutils allows it. */
- .machine power7
+ .machine power8
ENTRY_TOCLESS (__strnlen)
CALL_MCOUNT 2
dcbt 0,r3
@@ -207,7 +184,7 @@ L(smaller):
/* Place rounded up number of qw's to check into a vmx
register, and use some vector tricks to minimize
branching. */
- MTVRD(v7,r4) /* Copy maxlen from GPR to vector register. */
+ mtvrd v7,r4 /* copy maxlen from gpr to vector register. */
vspltisb v5,1
vspltisb v6,15
vspltb v2,v7,7
@@ -229,20 +206,20 @@ L(loop_16B):
beq cr6,L(loop_16B) /* If null bytes not found. */
vcmpequb v1,v1,v0
- VBPERMQ(v1,v1,v10)
+ vbpermq v1,v1,v10
#ifdef __LITTLE_ENDIAN__
vsubuhm v2,v1,v5 /* Form a mask of trailing zeros. */
vandc v2,v2,v1
- VPOPCNTH(v1,v2) /* Count of trailing zeros, 16 if none. */
+ vpopcnth v1,v2 /* count of trailing zeros, 16 if none. */
#else
- VCLZH(v1,v1) /* Count the leading zeros, 16 if none. */
+ vclzh v1,v1 /* count the leading zeros, 16 if none. */
#endif
/* Truncate to maximum allowable offset. */
vcmpgtub v2,v1,v7 /* Compare and truncate for matches beyond
maxlen. */
vsel v1,v1,v7,v2 /* 0-16 is now in byte 7. */
- MFVRD(r0,v1)
+ mfvrd r0,v1
addi r5,r5,-16 /* Undo speculative bump. */
extsb r0,r0 /* Clear whatever gunk is in the high 56b. */
add r5,r5,r0 /* Add the offset of whatever was found. */
@@ -262,10 +239,10 @@ L(found_64B):
vcmpequb v4,v4,v0
/* Permute the first bit of each byte into bits 48-63. */
- VBPERMQ(v1,v1,v10)
- VBPERMQ(v2,v2,v10)
- VBPERMQ(v3,v3,v10)
- VBPERMQ(v4,v4,v10)
+ vbpermq v1,v1,v10
+ vbpermq v2,v2,v10
+ vbpermq v3,v3,v10
+ vbpermq v4,v4,v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
@@ -286,7 +263,7 @@ L(found_64B):
/* Adjust address to the start of the current 64B block. */
addi r5,r5,-64
- MFVRD(r10,v4)
+ mfvrd r10,v4
#ifdef __LITTLE_ENDIAN__
addi r9,r10,-1 /* Form a mask from trailing zeros. */
andc r9,r9,r10
@@ -302,15 +279,15 @@ L(found_64B):
as a preparation for the 64B loop. */
.p2align 4
L(found_aligning64B):
- VBPERMQ(v1,v1,v10)
+ vbpermq v1,v1,v10
#ifdef __LITTLE_ENDIAN__
- MFVRD(r10,v1)
+ mfvrd r10,v1
addi r9,r10,-1 /* Form a mask from trailing zeros. */
andc r9,r9,r10
popcntd r0,r9 /* Count the bits in the mask. */
#else
vsldoi v1,v1,v1,6
- MFVRD(r10,v1)
+ mfvrd r10,v1
cntlzd r0,r10 /* Count leading zeros before the match. */
#endif
addi r5,r5,-16 /* Adjust address to offset of last 16 bytes
diff --git a/sysdeps/powerpc/powerpc64/power8/strrchr.S b/sysdeps/powerpc/powerpc64/power8/strrchr.S
index 988602d8d0..09d9f518ff 100644
--- a/sysdeps/powerpc/powerpc64/power8/strrchr.S
+++ b/sysdeps/powerpc/powerpc64/power8/strrchr.S
@@ -19,26 +19,13 @@
#include <sysdep.h>
/* char *[r3] strrchr (char *s [r3], int c [r4]) */
-/* TODO: change these to the actual instructions when the minimum required
- binutils allows it. */
-#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define VBPERMQ(t,a,b) .long (0x1000054c \
- | ((t)<<(32-11)) \
- | ((a)<<(32-16)) \
- | ((b)<<(32-21)) )
-#define VCLZD(r,v) .long (0x100007c2 | ((r)<<(32-11)) | ((v)<<(32-21)))
-#define VPOPCNTD(r,v) .long (0x100007c3 | ((r)<<(32-11)) | ((v)<<(32-21)))
-#define VADDUQM(t,a,b) .long (0x10000100 \
- | ((t)<<(32-11)) \
- | ((a)<<(32-16)) \
- | ((b)<<(32-21)) )
+
#ifdef __LITTLE_ENDIAN__
/* Find the match position from v6 and place result in r6. */
# define CALCULATE_MATCH() \
- VBPERMQ(v6, v6, v10); \
+ vbpermq v6, v6, v10; \
vsldoi v6, v6, v6, 6; \
- MFVRD(r7, v6); \
+ mfvrd r7, v6; \
cntlzd r6, r7; \
subfic r6, r6, 15;
/*
@@ -48,9 +35,9 @@
*/
# define FIND_NULL_POS(reg) \
vspltisb v11, -1; \
- VADDUQM(v11, reg, v11); \
+ vadduqm v11, reg, v11; \
vandc v11, v11, reg; \
- VPOPCNTD(v2, v11); \
+ vpopcntd v2, v11; \
vspltb v11, v2, 15; \
vcmpequb. v11, v11, v9; \
blt cr6, 1f; \
@@ -60,14 +47,14 @@
vsumsws v2, v2, v0;
#else
# define CALCULATE_MATCH() \
- VBPERMQ(v6, v6, v10); \
- MFVRD(r7, v6); \
+ vbpermq v6, v6, v10; \
+ mfvrd r7, v6; \
addi r6, r7, -1; \
andc r6, r6, r7; \
popcntd r6, r6; \
subfic r6, r6, 15;
# define FIND_NULL_POS(reg) \
- VCLZD(v2, reg); \
+ vclzd v2, reg; \
vspltb v11, v2, 7; \
vcmpequb. v11, v11, v9; \
blt cr6, 1f; \
@@ -80,7 +67,7 @@
#ifndef STRRCHR
# define STRRCHR strrchr
#endif
- .machine power7
+ .machine power8
ENTRY_TOCLESS (STRRCHR)
CALL_MCOUNT 2
dcbt 0,r3
@@ -243,7 +230,7 @@ L(vector):
vspltisb v10, 3
lvsl v11, r0, r0
vslb v10, v11, v10
- MTVRD(v1, r4)
+ mtvrd v1, r4
li r5, 16
vspltb v1, v1, 7
/* Compare 32 bytes in each loop. */
@@ -442,8 +429,8 @@ L(continue1):
addi r3, r3, 32
blt cr6, L(continue1)
addi r3, r3, -32
- VBPERMQ(v2, v2, v10)
- VBPERMQ(v3, v3, v10)
+ vbpermq v2, v2, v10
+ vbpermq v3, v3, v10
/* Shift each component into its correct position for merging. */
#ifdef __LITTLE_ENDIAN__
vsldoi v3, v3, v3, 2
@@ -453,7 +440,7 @@ L(continue1):
#endif
/* Merge the results and move to a GPR. */
vor v4, v3, v2
- MFVRD(r5, v4)
+ mfvrd r5, v4
#ifdef __LITTLE_ENDIAN__
addi r6, r5, -1
andc r6, r6, r5
diff --git a/sysdeps/powerpc/powerpc64/power8/strspn.S b/sysdeps/powerpc/powerpc64/power8/strspn.S
index 02bac4113e..632db2540b 100644
--- a/sysdeps/powerpc/powerpc64/power8/strspn.S
+++ b/sysdeps/powerpc/powerpc64/power8/strspn.S
@@ -52,22 +52,7 @@
#define XXVR(insn, vrt, vra, vrb) \
insn 32+vrt, 32+vra, 32+vrb
-/* ISA 2.07B instructions are not all defined for older binutils.
- Macros are defined below for these newer instructions in order
- to maintain compatibility. */
-
-/* Note, TX/SX is always set as VMX regs are the high 32 VSX regs. */
-#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16)))
-#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16)))
-
-#define VBPERMQ(t,a,b) .long (0x1000054c \
- | ((t)<<(32-11)) \
- | ((a)<<(32-16)) \
- | ((b)<<(32-21)) )
-
- /* This can be updated to power8 once the minimum version of
- binutils supports power8 and the above instructions. */
- .machine power7
+ .machine power8
ENTRY_TOCLESS (STRSPN, 4)
CALL_MCOUNT 2
@@ -135,10 +120,10 @@ L(needle_gt128):
L(start_cmp):
/* Move and merge bitmap into 2 VRs. bpermd is slower on P8. */
mr r0, r3 /* Save r3 for final length computation. */
- MTVRD (v5, r5)
- MTVRD (v6, r6)
- MTVRD (v7, r7)
- MTVRD (v8, r8)
+ mtvrd v5, r5
+ mtvrd v6, r6
+ mtvrd v7, r7
+ mtvrd v8, r8
/* Continue interleaved mask generation. */
#ifdef __LITTLE_ENDIAN__
@@ -160,8 +145,8 @@ L(start_cmp):
/* Compare the first 1-16B, while masking unwanted bytes. */
clrrdi r3, r3, 4 /* Note, counts from qw boundaries. */
vxor v9, v0, v1 /* Swap high bit. */
- VBPERMQ (v8, v5, v0)
- VBPERMQ (v7, v6, v9)
+ vbpermq v8, v5, v0
+ vbpermq v7, v6, v9
vor v7, v7, v8
vor v7, v7, v11 /* Ignore non-participating bytes. */
vcmpequh. v8, v7, v4
@@ -174,8 +159,8 @@ L(vec):
lvx v0, 0, r3
addi r3, r3, 16
vxor v9, v0, v1 /* Swap high bit. */
- VBPERMQ (v8, v5, v0)
- VBPERMQ (v7, v6, v9)
+ vbpermq v8, v5, v0
+ vbpermq v7, v6, v9
vor v7, v7, v8
vcmpequh. v8, v7, v4
blt cr6, L(vec)
@@ -183,7 +168,7 @@ L(vec):
addi r3, r3, -16
L(done):
subf r3, r0, r3
- MFVRD (r10, v7)
+ mfvrd r10, v7
#ifdef __LITTLE_ENDIAN__
addi r0, r10, 1 /* Count the trailing 1's. */