From 7ec1221ff7a5e3faa4e58cdfeb3722b2958499e2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 3 Mar 2010 02:10:22 -0800 Subject: sparc: Use ba,a,pt in PLTs and fix bugs in R_SPARC_JMP_IREL handling. 2010-03-03 David S. Miller * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_lazy_rel): Must pass '1' for 't' argument to sparc_fixup_plt. * sysdeps/sparc/sparc64/dl-machine.h (elf_machine_lazy_rel): Likewise. * sysdeps/sparc/sparc32/dl-plt.h (OPCODE_BA_PT): Define. (sparc_fixup_plt): Document 't' argument. Enable branch optimization and use v9 branches when possible. Explain why we cannot unconditionally patch the branch into the first PLT instruction. * sysdeps/sparc/sparc64/dl-plt.h (sparc64_fixup_plt): Document 't' argument. Use v9 branches when possible. Explain why we can in fact unconditionally use a branch in the first PLT instruction here. --- sysdeps/sparc/sparc32/dl-plt.h | 47 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) (limited to 'sysdeps/sparc/sparc32/dl-plt.h') diff --git a/sysdeps/sparc/sparc32/dl-plt.h b/sysdeps/sparc/sparc32/dl-plt.h index edcc5c1374..bfb891fe69 100644 --- a/sysdeps/sparc/sparc32/dl-plt.h +++ b/sysdeps/sparc/sparc32/dl-plt.h @@ -25,19 +25,55 @@ #define OPCODE_JMP_G1 0x81c06000 /* jmp %g1+?; add lo 10 bits of value */ #define OPCODE_SAVE_SP 0x9de3bfa8 /* save %sp, -(16+6)*4, %sp */ #define OPCODE_BA 0x30800000 /* b,a ?; add PC-rel word address */ +#define OPCODE_BA_PT 0x30480000 /* ba,a,pt %icc, ?; add PC-rel word address */ static inline __attribute__ ((always_inline)) Elf32_Addr sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr, Elf32_Addr value, int t, int do_flush) { - Elf32_Sword disp = value - (Elf32_Addr) reloc_addr; + Elf32_Sword disp; - if (0 && disp >= -0x800000 && disp < 0x800000) + /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap, + in which case we'll be resolving all PLT entries and thus can + optimize by overwriting instructions starting at the first PLT entry + instruction and we need not be mindful of thread safety. + + Otherwise, 't' is '1'. */ + reloc_addr += t; + disp = value - (Elf32_Addr) reloc_addr; + + if (disp >= -0x800000 && disp < 0x800000) { - /* Don't need to worry about thread safety. We're writing just one - instruction. */ + unsigned int insn = OPCODE_BA | ((disp >> 2) & 0x3fffff); + +#ifdef __sparc_v9__ + /* On V9 we can do even better by using a branch with + prediction if we fit into the even smaller 19-bit + displacement field. */ + if (disp >= -0x100000 && disp < 0x100000) + insn = OPCODE_BA_PT | ((disp >> 2) & 0x07ffff); +#endif + + /* Even if we are writing just a single branch, we must not + ignore the 't' offset. Consider a case where we have some + PLT slots which can be optimized into a single branch and + some which cannot. Then we can end up with a PLT which looks + like: + + PLT4.0: sethi %(PLT_4_INDEX), %g1 + sethi %(fully_resolved_sym_4), %g1 + jmp %g1 + %lo(fully_resolved_sym_4) + PLT5.0: ba,a fully_resolved_sym_5 + ba,a PLT0.0 + ... + + The delay slot of that jmp must always be either a sethi to + %g1 or a nop. But if we try to place this displacement + branch there, PLT4.0 will jump to fully_resolved_sym_4 for 1 + instruction and then go immediately to + fully_resolved_sym_5. */ - reloc_addr[0] = OPCODE_BA | ((disp >> 2) & 0x3fffff); + reloc_addr[0] = insn; if (do_flush) __asm __volatile ("flush %0" : : "r"(reloc_addr)); } @@ -48,7 +84,6 @@ sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr, need not be done during bootstrapping, since there are no threads. But we also can't tell if we _can_ use flush, so don't. */ - reloc_addr += t; reloc_addr[1] = OPCODE_JMP_G1 | (value & 0x3ff); if (do_flush) __asm __volatile ("flush %0+4" : : "r"(reloc_addr)); -- cgit v1.2.3