diff options
author | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2021-09-10 16:52:17 +0100 |
---|---|---|
committer | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2024-01-02 15:43:30 +0000 |
commit | d3c32ae207d4fc3e48bb47ce1b9f2c6cf0f35c4f (patch) | |
tree | 8714b622e0a3a69edce25ae7bc51291d38e23d32 | |
parent | 67f371e882499ea46eca1b9dc76c98a7c2d06b69 (diff) | |
download | glibc-d3c32ae207d4fc3e48bb47ce1b9f2c6cf0f35c4f.tar glibc-d3c32ae207d4fc3e48bb47ce1b9f2c6cf0f35c4f.tar.gz glibc-d3c32ae207d4fc3e48bb47ce1b9f2c6cf0f35c4f.tar.bz2 glibc-d3c32ae207d4fc3e48bb47ce1b9f2c6cf0f35c4f.zip |
aarch64: Add SME runtime support
The runtime support routines for the call ABI of the Scalable Matrix
Extension (SME) are mostly in libgcc. Since libc.so cannot depend on
libgcc_s.so have an implementation of __arm_za_disable in libc for
libc internal use in longjmp and similar APIs.
__libc_arm_za_disable follows the same PCS rules as __arm_za_disable,
but it's a hidden symbol so it does not need variant PCS marking.
Using __libc_fatal instead of abort because it can print a message and
works in ld.so too. But for now we don't need SME routines in ld.so.
To check the SME HWCAP in asm, we need the _dl_hwcap2 member offset in
_rtld_global_ro in the shared libc.so, while in libc.a the _dl_hwcap2
object is accessed.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
-rw-r--r-- | sysdeps/aarch64/Makefile | 10 | ||||
-rw-r--r-- | sysdeps/aarch64/__arm_za_disable.S | 112 | ||||
-rw-r--r-- | sysdeps/aarch64/rtld-global-offsets.sym | 10 |
3 files changed, 129 insertions, 3 deletions
diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile index 6a9559e5f5..9d8844d9c8 100644 --- a/sysdeps/aarch64/Makefile +++ b/sysdeps/aarch64/Makefile @@ -48,7 +48,9 @@ endif endif ifeq ($(subdir),csu) -gen-as-const-headers += tlsdesc.sym +gen-as-const-headers += \ + tlsdesc.sym \ + rtld-global-offsets.sym endif ifeq ($(subdir),gmon) @@ -62,8 +64,10 @@ endif ifeq ($(subdir),misc) sysdep_headers += sys/ifunc.h -sysdep_routines += __mtag_tag_zero_region \ - __mtag_tag_region +sysdep_routines += \ + __mtag_tag_zero_region \ + __mtag_tag_region \ + __arm_za_disable endif ifeq ($(subdir),malloc) diff --git a/sysdeps/aarch64/__arm_za_disable.S b/sysdeps/aarch64/__arm_za_disable.S new file mode 100644 index 0000000000..649891ea7f --- /dev/null +++ b/sysdeps/aarch64/__arm_za_disable.S @@ -0,0 +1,112 @@ +/* Libc internal support routine for SME. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <rtld-global-offsets.h> + +#define HWCAP2_SME_BIT 23 + +/* Disable ZA. Call ABI: + - Private ZA, streaming-compatible. + - x0-x13, x19-x29, sp and fp regs are call preserved. + - On return tpidr2_el0 = 0, ZA = 0. + - Takes no argument. + - Does not return a value. + - Can abort on failure (then registers are not preserved). */ + +ENTRY (__libc_arm_za_disable) + + /* Check if SME is available. */ +#ifdef SHARED + /* In libc.so. */ + adrp x14, :got:_rtld_global_ro + ldr x14, [x14, :got_lo12:_rtld_global_ro] + ldr x14, [x14, GLRO_DL_HWCAP2_OFFSET] +#else + /* In libc.a, may be PIC. */ + adrp x14, _dl_hwcap2 + ldr x14, [x14, :lo12:_dl_hwcap2] +#endif + tbz x14, HWCAP2_SME_BIT, L(end) + + .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ + cbz x14, L(end) + + /* Check reserved bytes, abort on unknown extensions. */ + ldrh w15, [x14, 10] + ldr w16, [x14, 12] + orr w15, w15, w16 + cbnz w15, L(fail) + + ldr x16, [x14] + cbz x16, L(end) + ldrh w17, [x14, 8] + cbz w17, L(end) + + /* x14: tpidr2, x15: 0, + x16: za_save_buffer, x17: num_za_save_slices. */ + +L(save_loop): + .inst 0xe1206200 /* str za[w15, 0], [x16] */ + .inst 0xe1206201 /* str za[w15, 1], [x16, 1, mul vl] */ + .inst 0xe1206202 /* str za[w15, 2], [x16, 2, mul vl] */ + .inst 0xe1206203 /* str za[w15, 3], [x16, 3, mul vl] */ + .inst 0xe1206204 /* str za[w15, 4], [x16, 4, mul vl] */ + .inst 0xe1206205 /* str za[w15, 5], [x16, 5, mul vl] */ + .inst 0xe1206206 /* str za[w15, 6], [x16, 6, mul vl] */ + .inst 0xe1206207 /* str za[w15, 7], [x16, 7, mul vl] */ + .inst 0xe1206208 /* str za[w15, 8], [x16, 8, mul vl] */ + .inst 0xe1206209 /* str za[w15, 9], [x16, 9, mul vl] */ + .inst 0xe120620a /* str za[w15, 10], [x16, 10, mul vl] */ + .inst 0xe120620b /* str za[w15, 11], [x16, 11, mul vl] */ + .inst 0xe120620c /* str za[w15, 12], [x16, 12, mul vl] */ + .inst 0xe120620d /* str za[w15, 13], [x16, 13, mul vl] */ + .inst 0xe120620e /* str za[w15, 14], [x16, 14, mul vl] */ + .inst 0xe120620f /* str za[w15, 15], [x16, 15, mul vl] */ + add w15, w15, 16 + .inst 0x04305a10 /* addsvl x16, x16, 16 */ + cmp w17, w15 + bhi L(save_loop) + .inst 0xd51bd0bf /* msr tpidr2_el0, xzr */ + .inst 0xd503447f /* smstop za */ +L(end): + ret +L(fail): +#if HAVE_AARCH64_PAC_RET + PACIASP + cfi_window_save +#endif + stp x29, x30, [sp, -32]! + cfi_adjust_cfa_offset (32) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + mov x29, sp + .inst 0x04e0e3f0 /* cntd x16 */ + str x16, [sp, 16] + cfi_rel_offset (46, 16) + .inst 0xd503467f /* smstop */ + adrp x0, L(msg) + add x0, x0, :lo12:L(msg) + bl HIDDEN_JUMPTARGET (__libc_fatal) +END (__libc_arm_za_disable) + + .section .rodata + .align 3 +L(msg): + .string "FATAL: __libc_arm_za_disable failed.\n" diff --git a/sysdeps/aarch64/rtld-global-offsets.sym b/sysdeps/aarch64/rtld-global-offsets.sym new file mode 100644 index 0000000000..23cdaf7d9e --- /dev/null +++ b/sysdeps/aarch64/rtld-global-offsets.sym @@ -0,0 +1,10 @@ +#define SHARED 1 + +#include <ldsodefs.h> + +#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name) + +-- Offsets of _rtld_global_ro in libc.so + +GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap) +GLRO_DL_HWCAP2_OFFSET GLRO_offsetof (dl_hwcap2) |