diff options
author | Noah Goldstein <goldstein.w.n@gmail.com> | 2022-11-18 16:13:32 -0800 |
---|---|---|
committer | Noah Goldstein <goldstein.w.n@gmail.com> | 2022-11-27 20:22:49 -0800 |
commit | f704192911c6c7b65a54beab3ab369fca7609a5d (patch) | |
tree | 577ab06e06659f4acafd17c290ac02605f628b49 /sysdeps/x86_64/fpu/svml_s_wrapper_impl.h | |
parent | 72f6a5a0ed25d14e6dab8f54878fd46ebaee2dd5 (diff) | |
download | glibc-f704192911c6c7b65a54beab3ab369fca7609a5d.tar glibc-f704192911c6c7b65a54beab3ab369fca7609a5d.tar.gz glibc-f704192911c6c7b65a54beab3ab369fca7609a5d.tar.bz2 glibc-f704192911c6c7b65a54beab3ab369fca7609a5d.zip |
x86/fpu: Factor out shared avx2/avx512 code in svml_{s|d}_wrapper_impl.h
Code is exactly the same for the two so better to only maintain one
version.
All math and mathvec tests pass on x86.
Diffstat (limited to 'sysdeps/x86_64/fpu/svml_s_wrapper_impl.h')
-rw-r--r-- | sysdeps/x86_64/fpu/svml_s_wrapper_impl.h | 172 |
1 files changed, 1 insertions, 171 deletions
diff --git a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h index fd9b363045..8d8e5ef7ec 100644 --- a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h +++ b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h @@ -118,174 +118,4 @@ ret .endm -/* AVX/AVX2 ISA version as wrapper to SSE ISA version. */ -.macro WRAPPER_IMPL_AVX callee - pushq %rbp - cfi_adjust_cfa_offset (8) - cfi_rel_offset (%rbp, 0) - movq %rsp, %rbp - cfi_def_cfa_register (%rbp) - andq $-32, %rsp - subq $32, %rsp - vmovaps %ymm0, (%rsp) - vzeroupper - call HIDDEN_JUMPTARGET(\callee) - vmovaps %xmm0, (%rsp) - vmovaps 16(%rsp), %xmm0 - call HIDDEN_JUMPTARGET(\callee) - /* combine xmm0 (return of second call) with result of first - call (saved on stack). Might be worth exploring logic that - uses `vpblend` and reads in ymm1 using -16(rsp). */ - vmovaps (%rsp), %xmm1 - vinsertf128 $1, %xmm0, %ymm1, %ymm0 - movq %rbp, %rsp - cfi_def_cfa_register (%rsp) - popq %rbp - cfi_adjust_cfa_offset (-8) - cfi_restore (%rbp) - ret -.endm - -/* 2 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */ -.macro WRAPPER_IMPL_AVX_ff callee - pushq %rbp - cfi_adjust_cfa_offset (8) - cfi_rel_offset (%rbp, 0) - movq %rsp, %rbp - cfi_def_cfa_register (%rbp) - andq $-32, %rsp - subq $64, %rsp - vmovaps %ymm0, (%rsp) - vmovaps %ymm1, 32(%rsp) - vzeroupper - call HIDDEN_JUMPTARGET(\callee) - vmovaps 48(%rsp), %xmm1 - vmovaps %xmm0, (%rsp) - vmovaps 16(%rsp), %xmm0 - call HIDDEN_JUMPTARGET(\callee) - /* combine xmm0 (return of second call) with result of first - call (saved on stack). Might be worth exploring logic that - uses `vpblend` and reads in ymm1 using -16(rsp). */ - vmovaps (%rsp), %xmm1 - vinsertf128 $1, %xmm0, %ymm1, %ymm0 - movq %rbp, %rsp - cfi_def_cfa_register (%rsp) - popq %rbp - cfi_adjust_cfa_offset (-8) - cfi_restore (%rbp) - ret -.endm - -/* 3 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */ -.macro WRAPPER_IMPL_AVX_fFF callee - pushq %rbp - cfi_adjust_cfa_offset (8) - cfi_rel_offset (%rbp, 0) - movq %rsp, %rbp - andq $-32, %rsp - subq $32, %rsp - vmovaps %ymm0, (%rsp) - pushq %rbx - pushq %r14 - movq %rdi, %rbx - movq %rsi, %r14 - vzeroupper - call HIDDEN_JUMPTARGET(\callee) - vmovaps 32(%rsp), %xmm0 - leaq 16(%rbx), %rdi - leaq 16(%r14), %rsi - call HIDDEN_JUMPTARGET(\callee) - popq %r14 - popq %rbx - movq %rbp, %rsp - cfi_def_cfa_register (%rsp) - popq %rbp - cfi_adjust_cfa_offset (-8) - cfi_restore (%rbp) - ret -.endm - -/* AVX512 ISA version as wrapper to AVX2 ISA version. */ -.macro WRAPPER_IMPL_AVX512 callee - pushq %rbp - cfi_adjust_cfa_offset (8) - cfi_rel_offset (%rbp, 0) - movq %rsp, %rbp - cfi_def_cfa_register (%rbp) - andq $-64, %rsp - subq $64, %rsp - vmovups %zmm0, (%rsp) - call HIDDEN_JUMPTARGET(\callee) - vmovupd %ymm0, (%rsp) - vmovupd 32(%rsp), %ymm0 - call HIDDEN_JUMPTARGET(\callee) - /* combine ymm0 (return of second call) with result of first - call (saved on stack). */ - vmovaps (%rsp), %ymm1 - vinserti64x4 $0x1, %ymm0, %zmm1, %zmm0 - movq %rbp, %rsp - cfi_def_cfa_register (%rsp) - popq %rbp - cfi_adjust_cfa_offset (-8) - cfi_restore (%rbp) - ret -.endm - -/* 2 argument AVX512 ISA version as wrapper to AVX2 ISA version. */ -.macro WRAPPER_IMPL_AVX512_ff callee - pushq %rbp - cfi_adjust_cfa_offset (8) - cfi_rel_offset (%rbp, 0) - movq %rsp, %rbp - cfi_def_cfa_register (%rbp) - andq $-64, %rsp - addq $-128, %rsp - vmovups %zmm0, (%rsp) - vmovups %zmm1, 64(%rsp) - /* ymm0 and ymm1 are already set. */ - call HIDDEN_JUMPTARGET(\callee) - vmovups 96(%rsp), %ymm1 - vmovaps %ymm0, (%rsp) - vmovups 32(%rsp), %ymm0 - call HIDDEN_JUMPTARGET(\callee) - /* combine ymm0 (return of second call) with result of first - call (saved on stack). */ - vmovaps (%rsp), %ymm1 - vinserti64x4 $0x1, %ymm0, %zmm1, %zmm0 - movq %rbp, %rsp - cfi_def_cfa_register (%rsp) - popq %rbp - cfi_adjust_cfa_offset (-8) - cfi_restore (%rbp) - ret -.endm - -/* 3 argument AVX512 ISA version as wrapper to AVX2 ISA version. */ -.macro WRAPPER_IMPL_AVX512_fFF callee - pushq %rbp - cfi_adjust_cfa_offset (8) - cfi_rel_offset (%rbp, 0) - movq %rsp, %rbp - cfi_def_cfa_register (%rbp) - andq $-64, %rsp - subq $64, %rsp - vmovaps %zmm0, (%rsp) - pushq %rbx - pushq %r14 - movq %rdi, %rbx - movq %rsi, %r14 - /* ymm0 is already set. */ - call HIDDEN_JUMPTARGET(\callee) - vmovaps 48(%rsp), %ymm0 - leaq 32(%rbx), %rdi - leaq 32(%r14), %rsi - call HIDDEN_JUMPTARGET(\callee) - popq %r14 - popq %rbx - movq %rbp, %rsp - cfi_def_cfa_register (%rsp) - popq %rbp - cfi_adjust_cfa_offset (-8) - cfi_restore (%rbp) - ret -.endm +#include "svml_sd_wrapper_impl.h" |