aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
diff options
context:
space:
mode:
authorNoah Goldstein <goldstein.w.n@gmail.com>2022-11-18 16:13:32 -0800
committerNoah Goldstein <goldstein.w.n@gmail.com>2022-11-27 20:22:49 -0800
commitf704192911c6c7b65a54beab3ab369fca7609a5d (patch)
tree577ab06e06659f4acafd17c290ac02605f628b49 /sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
parent72f6a5a0ed25d14e6dab8f54878fd46ebaee2dd5 (diff)
downloadglibc-f704192911c6c7b65a54beab3ab369fca7609a5d.tar
glibc-f704192911c6c7b65a54beab3ab369fca7609a5d.tar.gz
glibc-f704192911c6c7b65a54beab3ab369fca7609a5d.tar.bz2
glibc-f704192911c6c7b65a54beab3ab369fca7609a5d.zip
x86/fpu: Factor out shared avx2/avx512 code in svml_{s|d}_wrapper_impl.h
Code is exactly the same for the two so better to only maintain one version. All math and mathvec tests pass on x86.
Diffstat (limited to 'sysdeps/x86_64/fpu/svml_s_wrapper_impl.h')
-rw-r--r--sysdeps/x86_64/fpu/svml_s_wrapper_impl.h172
1 files changed, 1 insertions, 171 deletions
diff --git a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
index fd9b363045..8d8e5ef7ec 100644
--- a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
+++ b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
@@ -118,174 +118,4 @@
ret
.endm
-/* AVX/AVX2 ISA version as wrapper to SSE ISA version. */
-.macro WRAPPER_IMPL_AVX callee
- pushq %rbp
- cfi_adjust_cfa_offset (8)
- cfi_rel_offset (%rbp, 0)
- movq %rsp, %rbp
- cfi_def_cfa_register (%rbp)
- andq $-32, %rsp
- subq $32, %rsp
- vmovaps %ymm0, (%rsp)
- vzeroupper
- call HIDDEN_JUMPTARGET(\callee)
- vmovaps %xmm0, (%rsp)
- vmovaps 16(%rsp), %xmm0
- call HIDDEN_JUMPTARGET(\callee)
- /* combine xmm0 (return of second call) with result of first
- call (saved on stack). Might be worth exploring logic that
- uses `vpblend` and reads in ymm1 using -16(rsp). */
- vmovaps (%rsp), %xmm1
- vinsertf128 $1, %xmm0, %ymm1, %ymm0
- movq %rbp, %rsp
- cfi_def_cfa_register (%rsp)
- popq %rbp
- cfi_adjust_cfa_offset (-8)
- cfi_restore (%rbp)
- ret
-.endm
-
-/* 2 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */
-.macro WRAPPER_IMPL_AVX_ff callee
- pushq %rbp
- cfi_adjust_cfa_offset (8)
- cfi_rel_offset (%rbp, 0)
- movq %rsp, %rbp
- cfi_def_cfa_register (%rbp)
- andq $-32, %rsp
- subq $64, %rsp
- vmovaps %ymm0, (%rsp)
- vmovaps %ymm1, 32(%rsp)
- vzeroupper
- call HIDDEN_JUMPTARGET(\callee)
- vmovaps 48(%rsp), %xmm1
- vmovaps %xmm0, (%rsp)
- vmovaps 16(%rsp), %xmm0
- call HIDDEN_JUMPTARGET(\callee)
- /* combine xmm0 (return of second call) with result of first
- call (saved on stack). Might be worth exploring logic that
- uses `vpblend` and reads in ymm1 using -16(rsp). */
- vmovaps (%rsp), %xmm1
- vinsertf128 $1, %xmm0, %ymm1, %ymm0
- movq %rbp, %rsp
- cfi_def_cfa_register (%rsp)
- popq %rbp
- cfi_adjust_cfa_offset (-8)
- cfi_restore (%rbp)
- ret
-.endm
-
-/* 3 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */
-.macro WRAPPER_IMPL_AVX_fFF callee
- pushq %rbp
- cfi_adjust_cfa_offset (8)
- cfi_rel_offset (%rbp, 0)
- movq %rsp, %rbp
- andq $-32, %rsp
- subq $32, %rsp
- vmovaps %ymm0, (%rsp)
- pushq %rbx
- pushq %r14
- movq %rdi, %rbx
- movq %rsi, %r14
- vzeroupper
- call HIDDEN_JUMPTARGET(\callee)
- vmovaps 32(%rsp), %xmm0
- leaq 16(%rbx), %rdi
- leaq 16(%r14), %rsi
- call HIDDEN_JUMPTARGET(\callee)
- popq %r14
- popq %rbx
- movq %rbp, %rsp
- cfi_def_cfa_register (%rsp)
- popq %rbp
- cfi_adjust_cfa_offset (-8)
- cfi_restore (%rbp)
- ret
-.endm
-
-/* AVX512 ISA version as wrapper to AVX2 ISA version. */
-.macro WRAPPER_IMPL_AVX512 callee
- pushq %rbp
- cfi_adjust_cfa_offset (8)
- cfi_rel_offset (%rbp, 0)
- movq %rsp, %rbp
- cfi_def_cfa_register (%rbp)
- andq $-64, %rsp
- subq $64, %rsp
- vmovups %zmm0, (%rsp)
- call HIDDEN_JUMPTARGET(\callee)
- vmovupd %ymm0, (%rsp)
- vmovupd 32(%rsp), %ymm0
- call HIDDEN_JUMPTARGET(\callee)
- /* combine ymm0 (return of second call) with result of first
- call (saved on stack). */
- vmovaps (%rsp), %ymm1
- vinserti64x4 $0x1, %ymm0, %zmm1, %zmm0
- movq %rbp, %rsp
- cfi_def_cfa_register (%rsp)
- popq %rbp
- cfi_adjust_cfa_offset (-8)
- cfi_restore (%rbp)
- ret
-.endm
-
-/* 2 argument AVX512 ISA version as wrapper to AVX2 ISA version. */
-.macro WRAPPER_IMPL_AVX512_ff callee
- pushq %rbp
- cfi_adjust_cfa_offset (8)
- cfi_rel_offset (%rbp, 0)
- movq %rsp, %rbp
- cfi_def_cfa_register (%rbp)
- andq $-64, %rsp
- addq $-128, %rsp
- vmovups %zmm0, (%rsp)
- vmovups %zmm1, 64(%rsp)
- /* ymm0 and ymm1 are already set. */
- call HIDDEN_JUMPTARGET(\callee)
- vmovups 96(%rsp), %ymm1
- vmovaps %ymm0, (%rsp)
- vmovups 32(%rsp), %ymm0
- call HIDDEN_JUMPTARGET(\callee)
- /* combine ymm0 (return of second call) with result of first
- call (saved on stack). */
- vmovaps (%rsp), %ymm1
- vinserti64x4 $0x1, %ymm0, %zmm1, %zmm0
- movq %rbp, %rsp
- cfi_def_cfa_register (%rsp)
- popq %rbp
- cfi_adjust_cfa_offset (-8)
- cfi_restore (%rbp)
- ret
-.endm
-
-/* 3 argument AVX512 ISA version as wrapper to AVX2 ISA version. */
-.macro WRAPPER_IMPL_AVX512_fFF callee
- pushq %rbp
- cfi_adjust_cfa_offset (8)
- cfi_rel_offset (%rbp, 0)
- movq %rsp, %rbp
- cfi_def_cfa_register (%rbp)
- andq $-64, %rsp
- subq $64, %rsp
- vmovaps %zmm0, (%rsp)
- pushq %rbx
- pushq %r14
- movq %rdi, %rbx
- movq %rsi, %r14
- /* ymm0 is already set. */
- call HIDDEN_JUMPTARGET(\callee)
- vmovaps 48(%rsp), %ymm0
- leaq 32(%rbx), %rdi
- leaq 32(%r14), %rsi
- call HIDDEN_JUMPTARGET(\callee)
- popq %r14
- popq %rbx
- movq %rbp, %rsp
- cfi_def_cfa_register (%rsp)
- popq %rbp
- cfi_adjust_cfa_offset (-8)
- cfi_restore (%rbp)
- ret
-.endm
+#include "svml_sd_wrapper_impl.h"