diff options
author | Stefan Liebler <stli@linux.vnet.ibm.com> | 2016-03-31 17:37:16 +0200 |
---|---|---|
committer | Stefan Liebler <stli@linux.vnet.ibm.com> | 2016-03-31 17:37:16 +0200 |
commit | 4603c51ef7989d7eb800cdd6f42aab206f891077 (patch) | |
tree | c15dc0e82a1b8b16eac6fdef4fef8c70450921eb /sysdeps/s390/s390-64/dl-trampoline.h | |
parent | e91bd7465816f474617dcb4bbfe72f3594c5783c (diff) | |
download | glibc-4603c51ef7989d7eb800cdd6f42aab206f891077.tar glibc-4603c51ef7989d7eb800cdd6f42aab206f891077.tar.gz glibc-4603c51ef7989d7eb800cdd6f42aab206f891077.tar.bz2 glibc-4603c51ef7989d7eb800cdd6f42aab206f891077.zip |
S390: Save and restore fprs/vrs while resolving symbols.
On s390, no fpr/vrs were saved while resolving a symbol
via _dl_runtime_resolve/_dl_runtime_profile.
According to the abi, the fpr-arguments are defined as call clobbered.
In leaf-functions, gcc 4.9 and newer can use fprs for saving/restoring gprs
instead of saving them to the stack.
If gcc do this in one of the resolver-functions, then the floating point
arguments of a library-function are invalid for the first library-function-call.
Thus, this patch saves/restores the fprs around the resolving code.
The same could occur for vector registers. Furthermore an ifunc-resolver
could also clobber the vector/floating point argument registers.
Thus this patch provides the further variants _dl_runtime_resolve_vx/
_dl_runtime_profile_vx, which are used if the kernel claims, that
we run on a machine with vector registers.
Furthermore, if _dl_runtime_profile calls _dl_call_pltexit,
the pointers to inregs-/outregs-structs were setup invalid.
Now they point to the correct location in the stack-frame.
Before branching back to the caller, the return values are now
restored instead of containing the return values of the
_dl_call_pltexit() call.
On s390-32, an endless loop occurs if _dl_call_pltexit() should be called.
Now, this code-path branches to this function instead of just after the
preceding basr-instruction.
ChangeLog:
* sysdeps/s390/s390-32/dl-trampoline.S: Include dl-trampoline.h twice
to create a non-vector/vector version for _dl_runtime_resolve and
_dl_runtime_profile. Move implementation to ...
* sysdeps/s390/s390-32/dl-trampoline.h: ... here.
(_dl_runtime_resolve) Save and restore fpr/vrs.
(_dl_runtime_profile) Save and restore vrs and fix some issues
if _dl_call_pltexit is called.
* sysdeps/s390/s390-32/dl-machine.h (elf_machine_runtime_setup):
Choose the correct resolver function if running on a machine with vx.
* sysdeps/s390/s390-64/dl-trampoline.S: Include dl-trampoline.h twice
to create a non-vector/vector version for _dl_runtime_resolve and
_dl_runtime_profile. Move implementation to ...
* sysdeps/s390/s390-64/dl-trampoline.h: ... here.
(_dl_runtime_resolve) Save and restore fpr/vrs.
(_dl_runtime_profile) Save and restore vrs and fix some issues
* sysdeps/s390/s390-64/dl-machine.h: (elf_machine_runtime_setup):
Choose the correct resolver function if running on a machine with vx.
Diffstat (limited to 'sysdeps/s390/s390-64/dl-trampoline.h')
-rw-r--r-- | sysdeps/s390/s390-64/dl-trampoline.h | 211 |
1 files changed, 211 insertions, 0 deletions
diff --git a/sysdeps/s390/s390-64/dl-trampoline.h b/sysdeps/s390/s390-64/dl-trampoline.h new file mode 100644 index 0000000000..658e3a31c8 --- /dev/null +++ b/sysdeps/s390/s390-64/dl-trampoline.h @@ -0,0 +1,211 @@ +/* PLT trampolines. s390x version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The PLT stubs will call _dl_runtime_resolve/_dl_runtime_profile + * with the following linkage: + * r2 - r6 : parameter registers + * f0, f2, f4, f6 : floating point parameter registers + * v24, v26, v28, v30, v25, v27, v29, v31 : vector parameter registers + * 48(r15), 56(r15) : PLT arguments PLT1, PLT2 + * 160(r15) : additional stack parameters + * The normal clobber rules for function calls apply: + * r0 - r5 : call clobbered + * r6 - r13 : call saved + * r14 : return address (call clobbered) + * r15 : stack pointer (call saved) + * f0 - f7 : call clobbered + * f8 - f15 : call saved + * v0 - v7 : bytes 0-7 overlap with f0-f7: call clobbered + bytes 8-15: call clobbered + * v8 - v15 : bytes 0-7 overlap with f8-f15: call saved + bytes 8-15: call clobbered + * v16 - v31 : call clobbered + */ + + .globl _dl_runtime_resolve + .type _dl_runtime_resolve, @function + cfi_startproc + .align 16 +_dl_runtime_resolve: + stmg %r2,%r5,64(%r15) # save call-clobbered argument registers + cfi_offset (r2, -96) + cfi_offset (r3, -88) + cfi_offset (r4, -80) + cfi_offset (r5, -72) + std %f0,104(%r15) + cfi_offset (f0, -56) + std %f2,112(%r15) + cfi_offset (f2, -48) + std %f4,120(%r15) + cfi_offset (f4, -40) + std %f6,128(%r15) + cfi_offset (f6, -32) + stg %r14,96(15) + cfi_offset (r14, -64) + lmg %r2,%r3,48(%r15) # load args for fixup saved by PLT + lgr %r0,%r15 +#ifdef RESTORE_VRS + aghi %r15,-288 # create stack frame + cfi_adjust_cfa_offset (288) + .machine push + .machine "z13" + vstm %v24,%v31,160(%r15)# store call-clobbered vector argument registers + cfi_offset (v24, -288) + cfi_offset (v25, -272) + cfi_offset (v26, -256) + cfi_offset (v27, -240) + cfi_offset (v28, -224) + cfi_offset (v29, -208) + cfi_offset (v30, -192) + cfi_offset (v31, -176) + .machine pop +#else + aghi %r15,-160 # create stack frame + cfi_adjust_cfa_offset (160) +#endif + stg %r0,0(%r15) # write backchain + brasl %r14,_dl_fixup # call _dl_fixup + lgr %r1,%r2 # function addr returned in r2 +#ifdef RESTORE_VRS + .machine push + .machine "z13" + vlm %v24,%v31,160(%r15)# restore vector registers + .machine pop + aghi %r15,288 # remove stack frame + cfi_adjust_cfa_offset (-288) +#else + aghi %r15,160 # remove stack frame + cfi_adjust_cfa_offset (-160) +#endif + lg %r14,96(%r15) # restore registers + ld %f0,104(%r15) + ld %f2,112(%r15) + ld %f4,120(%r15) + ld %f6,128(%r15) + lmg %r2,%r5,64(%r15) + br %r1 + cfi_endproc + .size _dl_runtime_resolve, .-_dl_runtime_resolve + + +#ifndef PROF + .globl _dl_runtime_profile + .type _dl_runtime_profile, @function + cfi_startproc + .align 16 +_dl_runtime_profile: + stmg %r2,%r6,64(%r15) # save call-clobbered arg regs + cfi_offset (r2, -96) # + r6 needed as arg for + cfi_offset (r3, -88) # _dl_profile_fixup + cfi_offset (r4, -80) + cfi_offset (r5, -72) + cfi_offset (r6, -64) + std %f0,104(%r15) + cfi_offset (f0, -56) + std %f2,112(%r15) + cfi_offset (f2, -48) + std %f4,120(%r15) + cfi_offset (f4, -40) + std %f6,128(%r15) + cfi_offset (f6, -32) + stg %r12,24(%r15) # r12 is used as backup of r15 + cfi_offset (r12, -136) + stg %r14,32(%r15) + cfi_offset (r14, -128) + lgr %r12,%r15 # backup stack pointer + cfi_def_cfa_register (12) +#ifdef RESTORE_VRS + aghi %r15,-288 # create stack frame + .machine push + .machine "z13" + vstm %v24,%v31,160(%r15)# store call-clobbered vector argument registers + cfi_offset (v24, -288) + cfi_offset (v25, -272) + cfi_offset (v26, -256) + cfi_offset (v27, -240) + cfi_offset (v28, -224) + cfi_offset (v29, -208) + cfi_offset (v30, -192) + cfi_offset (v31, -176) + .machine pop +#else + aghi %r15,-160 # create stack frame +#endif + stg %r12,0(%r15) # save backchain + lmg %r2,%r3,48(%r12) # load arguments saved by PLT + lgr %r4,%r14 # return address as third parameter + la %r5,64(%r12) # pointer to struct La_s390_64_regs + la %r6,40(%r12) # long int * framesize + brasl %r14,_dl_profile_fixup # call resolver + lgr %r1,%r2 # function addr returned in r2 + ld %f0,104(%r12) # restore call-clobbered arg fprs + ld %f2,112(%r12) + ld %f4,120(%r12) + ld %f6,128(%r12) +#ifdef RESTORE_VRS + .machine push + .machine "z13" + vlm %v24,%v31,160(%r15) # restore call-clobbered arg vrs + .machine pop +#endif + lg %r0,40(%r12) # load framesize + ltgr %r0,%r0 + jnm 1f + + lmg %r2,%r6,64(%r12) # framesize < 0 means no pltexit call + # so we can do a tail call without + # copying the arg overflow area + lgr %r15,%r12 # remove stack frame + cfi_def_cfa_register (15) + lg %r14,32(%r15) # restore registers + lg %r12,24(%r15) + br %r1 # tail-call to resolved function + + cfi_def_cfa_register (12) +1: jz 4f # framesize == 0 ? + aghi %r0,7 # align framesize to 8 + nill %r0,0xfff8 + slgr %r15,%r0 # make room for framesize bytes + stg %r12,0(%r15) # save backchain + la %r2,160(%r15) + la %r3,160(%r12) + srlg %r0,%r0,3 +3: mvc 0(8,%r2),0(%r3) # copy additional parameters + la %r2,8(%r2) # depending on framesize + la %r3,8(%r3) + brctg %r0,3b +4: lmg %r2,%r6,64(%r12) # restore call-clobbered arg gprs + basr %r14,%r1 # call resolved function + stg %r2,136(%r12) # store return values r2, f0 + std %f0,144(%r12) # to struct La_s390_64_retval + lmg %r2,%r3,48(%r12) # load arguments saved by PLT + la %r4,64(%r12) # pointer to struct La_s390_64_regs + la %r5,136(%r12) # pointer to struct La_s390_64_retval + brasl %r14,_dl_call_pltexit + + lgr %r15,%r12 # remove stack frame + cfi_def_cfa_register (15) + lg %r14,32(%r15) # restore registers + lg %r12,24(%r15) + lg %r2,136(%r15) # restore return values + ld %f0,144(%r15) + br %r14 # Jump back to caller + + cfi_endproc + .size _dl_runtime_profile, .-_dl_runtime_profile +#endif |