aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/powerpc32/fpu
diff options
context:
space:
mode:
authorAnton Blanchard <anton@au1.ibm.com>2013-08-17 18:34:40 +0930
committerAlan Modra <amodra@gmail.com>2013-10-04 10:37:59 +0930
commitbe1e5d311342e08ae1f8013342df27b7ded2c156 (patch)
treeccf1ca254c3aa1392262c72de0b960eb1ec7ec21 /sysdeps/powerpc/powerpc32/fpu
parentfef13a78ea30d4c26d6bab48d731ebe864ee31b0 (diff)
downloadglibc-be1e5d311342e08ae1f8013342df27b7ded2c156.tar
glibc-be1e5d311342e08ae1f8013342df27b7ded2c156.tar.gz
glibc-be1e5d311342e08ae1f8013342df27b7ded2c156.tar.bz2
glibc-be1e5d311342e08ae1f8013342df27b7ded2c156.zip
PowerPC LE setjmp/longjmp
http://sourceware.org/ml/libc-alpha/2013-08/msg00089.html Little-endian fixes for setjmp/longjmp. When writing these I noticed the setjmp code corrupts the non volatile VMX registers when using an unaligned buffer. Anton fixed this, and also simplified it quite a bit. The current code uses boilerplate for the case where we want to store 16 bytes to an unaligned address. For that we have to do a read/modify/write of two aligned 16 byte quantities. In our case we are storing a bunch of back to back data (consective VMX registers), and only the start and end of the region need the read/modify/write. [BZ #15723] * sysdeps/powerpc/jmpbuf-offsets.h: Comment fix. * sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Correct _dl_hwcap access for little-endian. * sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Likewise. Don't destroy vmx regs when saving unaligned. * sysdeps/powerpc/powerpc64/__longjmp-common.S: Correct CR load. * sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise CR save. Don't destroy vmx regs when saving unaligned.
Diffstat (limited to 'sysdeps/powerpc/powerpc32/fpu')
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S8
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/setjmp-common.S73
2 files changed, 40 insertions, 41 deletions
diff --git a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
index 9d34cd9165..d02aa57549 100644
--- a/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
+++ b/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
@@ -43,16 +43,16 @@ ENTRY (__longjmp)
# endif
mtlr r6
cfi_same_value (lr)
- lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
+ lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
# else
lwz r5,_dl_hwcap@got(r5)
mtlr r6
cfi_same_value (lr)
- lwz r5,4(r5)
+ lwz r5,LOWORD(r5)
# endif
# else
- lis r5,(_dl_hwcap+4)@ha
- lwz r5,(_dl_hwcap+4)@l(r5)
+ lis r5,(_dl_hwcap+LOWORD)@ha
+ lwz r5,(_dl_hwcap+LOWORD)@l(r5)
# endif
andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
beq L(no_vmx)
diff --git a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
index 46ea2b00f9..f3244060e0 100644
--- a/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
+++ b/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
@@ -94,14 +94,14 @@ ENTRY (__sigsetjmp)
# else
lwz r5,_rtld_global_ro@got(r5)
# endif
- lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
+ lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
# else
lwz r5,_dl_hwcap@got(r5)
- lwz r5,4(r5)
+ lwz r5,LOWORD(r5)
# endif
# else
- lis r6,(_dl_hwcap+4)@ha
- lwz r5,(_dl_hwcap+4)@l(r6)
+ lis r6,(_dl_hwcap+LOWORD)@ha
+ lwz r5,(_dl_hwcap+LOWORD)@l(r6)
# endif
andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
beq L(no_vmx)
@@ -111,44 +111,43 @@ ENTRY (__sigsetjmp)
stw r0,((JB_VRSAVE)*4)(3)
addi r6,r5,16
beq+ L(aligned_save_vmx)
- lvsr v0,0,r5
- vspltisb v1,-1 /* set v1 to all 1's */
- vspltisb v2,0 /* set v2 to all 0's */
- vperm v3,v2,v1,v0 /* v3 contains shift mask with num all 1 bytes on left = misalignment */
+ lvsr v0,0,r5
+ lvsl v1,0,r5
+ addi r6,r5,-16
- /* Special case for v20 we need to preserve what is in save area below v20 before obliterating it */
- lvx v5,0,r5
- vperm v20,v20,v20,v0
- vsel v5,v5,v20,v3
- vsel v20,v20,v2,v3
- stvx v5,0,r5
+# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
+ addi addgpr,addgpr,32; \
+ vperm tmpvr,prevvr,savevr,shiftvr; \
+ stvx tmpvr,0,savegpr
-#define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
- addi addgpr,addgpr,32; \
- vperm savevr,savevr,savevr,shiftvr; \
- vsel hivr,prev_savevr,savevr,maskvr; \
- stvx hivr,0,savegpr;
+ /*
+ * We have to be careful not to corrupt the data below v20 and
+ * above v31. To keep things simple we just rotate both ends in
+ * the opposite direction to our main permute so we can use
+ * the common macro.
+ */
- save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
- save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
- save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
- save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
- save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
- save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
- save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
- save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
- save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
- save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
+ /* load and rotate data below v20 */
+ lvx v2,0,r5
+ vperm v2,v2,v2,v1
+ save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
+ save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
+ save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
+ save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
+ save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
+ save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
+ save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
+ save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
+ save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
+ save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
+ save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
+ save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
+ /* load and rotate data above v31 */
+ lvx v2,0,r6
+ vperm v2,v2,v2,v1
+ save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
- /* Special case for r31 we need to preserve what is in save area above v31 before obliterating it */
- addi r5,r5,32
- vperm v31,v31,v31,v0
- lvx v4,0,r5
- vsel v5,v30,v31,v3
- stvx v5,0,r6
- vsel v4,v31,v4,v3
- stvx v4,0,r5
b L(no_vmx)
L(aligned_save_vmx):