aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2018-07-18 06:39:32 -0700
committerH.J. Lu <hjl.tools@gmail.com>2018-07-18 06:39:46 -0700
commit03aaf49b68d380492a7e7459817c6e926f96f0ef (patch)
tree378a06756ebe1ec6f89a3020ccf2c3b25d3af704 /sysdeps
parent811e9e52b2bd2fb59b981b174ad30489d2574081 (diff)
downloadglibc-03aaf49b68d380492a7e7459817c6e926f96f0ef.tar
glibc-03aaf49b68d380492a7e7459817c6e926f96f0ef.tar.gz
glibc-03aaf49b68d380492a7e7459817c6e926f96f0ef.tar.bz2
glibc-03aaf49b68d380492a7e7459817c6e926f96f0ef.zip
x86-64: Use _CET_NOTRACK in memcpy-ssse3.S
* sysdeps/x86_64/multiarch/memcpy-ssse3.S (BRANCH_TO_JMPTBL_ENTRY): Add _CET_NOTRACK before indirect jump to jump table. (MEMCPY): Likewise.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-ssse3.S124
1 files changed, 62 insertions, 62 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index 5dd209034b..0240bfa309 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -39,7 +39,7 @@
lea TABLE(%rip), %r11; \
movslq (%r11, INDEX, SCALE), INDEX; \
lea (%r11, INDEX), INDEX; \
- jmp *INDEX; \
+ _CET_NOTRACK jmp *INDEX; \
ud2
.section .text.ssse3,"ax",@progbits
@@ -86,7 +86,7 @@ L(start):
add %rdx, %rsi
add %rdx, %rdi
add %r11, %r9
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
.p2align 4
@@ -441,7 +441,7 @@ L(shl_1):
lea (L(shl_1_loop_L2)-L(shl_1_loop_L1))(%r9), %r9
L(L1_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_1_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -464,7 +464,7 @@ L(shl_1_loop_L1):
jb L(shl_1_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_1_end):
movaps %xmm4, -0x20(%rdi)
@@ -484,7 +484,7 @@ L(shl_1_bwd):
lea (L(shl_1_bwd_loop_L2)-L(shl_1_bwd_loop_L1))(%r9), %r9
L(L1_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_1_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -509,7 +509,7 @@ L(shl_1_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_1_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_1_bwd_end):
movaps %xmm4, (%rdi)
@@ -526,7 +526,7 @@ L(shl_2):
lea (L(shl_2_loop_L2)-L(shl_2_loop_L1))(%r9), %r9
L(L2_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_2_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -549,7 +549,7 @@ L(shl_2_loop_L1):
jb L(shl_2_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_2_end):
movaps %xmm4, -0x20(%rdi)
@@ -569,7 +569,7 @@ L(shl_2_bwd):
lea (L(shl_2_bwd_loop_L2)-L(shl_2_bwd_loop_L1))(%r9), %r9
L(L2_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_2_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -594,7 +594,7 @@ L(shl_2_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_2_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_2_bwd_end):
movaps %xmm4, (%rdi)
@@ -611,7 +611,7 @@ L(shl_3):
lea (L(shl_3_loop_L2)-L(shl_3_loop_L1))(%r9), %r9
L(L3_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_3_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -634,7 +634,7 @@ L(shl_3_loop_L1):
jb L(shl_3_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_3_end):
movaps %xmm4, -0x20(%rdi)
@@ -654,7 +654,7 @@ L(shl_3_bwd):
lea (L(shl_3_bwd_loop_L2)-L(shl_3_bwd_loop_L1))(%r9), %r9
L(L3_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_3_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -679,7 +679,7 @@ L(shl_3_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_3_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_3_bwd_end):
movaps %xmm4, (%rdi)
@@ -696,7 +696,7 @@ L(shl_4):
lea (L(shl_4_loop_L2)-L(shl_4_loop_L1))(%r9), %r9
L(L4_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_4_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -719,7 +719,7 @@ L(shl_4_loop_L1):
jb L(shl_4_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_4_end):
movaps %xmm4, -0x20(%rdi)
@@ -739,7 +739,7 @@ L(shl_4_bwd):
lea (L(shl_4_bwd_loop_L2)-L(shl_4_bwd_loop_L1))(%r9), %r9
L(L4_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_4_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -764,7 +764,7 @@ L(shl_4_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_4_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_4_bwd_end):
movaps %xmm4, (%rdi)
@@ -781,7 +781,7 @@ L(shl_5):
lea (L(shl_5_loop_L2)-L(shl_5_loop_L1))(%r9), %r9
L(L5_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_5_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -804,7 +804,7 @@ L(shl_5_loop_L1):
jb L(shl_5_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_5_end):
movaps %xmm4, -0x20(%rdi)
@@ -824,7 +824,7 @@ L(shl_5_bwd):
lea (L(shl_5_bwd_loop_L2)-L(shl_5_bwd_loop_L1))(%r9), %r9
L(L5_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_5_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -849,7 +849,7 @@ L(shl_5_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_5_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_5_bwd_end):
movaps %xmm4, (%rdi)
@@ -866,7 +866,7 @@ L(shl_6):
lea (L(shl_6_loop_L2)-L(shl_6_loop_L1))(%r9), %r9
L(L6_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_6_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -889,7 +889,7 @@ L(shl_6_loop_L1):
jb L(shl_6_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_6_end):
movaps %xmm4, -0x20(%rdi)
@@ -909,7 +909,7 @@ L(shl_6_bwd):
lea (L(shl_6_bwd_loop_L2)-L(shl_6_bwd_loop_L1))(%r9), %r9
L(L6_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_6_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -934,7 +934,7 @@ L(shl_6_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_6_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_6_bwd_end):
movaps %xmm4, (%rdi)
@@ -951,7 +951,7 @@ L(shl_7):
lea (L(shl_7_loop_L2)-L(shl_7_loop_L1))(%r9), %r9
L(L7_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_7_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -974,7 +974,7 @@ L(shl_7_loop_L1):
jb L(shl_7_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_7_end):
movaps %xmm4, -0x20(%rdi)
@@ -994,7 +994,7 @@ L(shl_7_bwd):
lea (L(shl_7_bwd_loop_L2)-L(shl_7_bwd_loop_L1))(%r9), %r9
L(L7_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_7_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1019,7 +1019,7 @@ L(shl_7_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_7_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_7_bwd_end):
movaps %xmm4, (%rdi)
@@ -1036,7 +1036,7 @@ L(shl_8):
lea (L(shl_8_loop_L2)-L(shl_8_loop_L1))(%r9), %r9
L(L8_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
L(shl_8_loop_L2):
prefetchnta 0x1c0(%rsi)
L(shl_8_loop_L1):
@@ -1058,7 +1058,7 @@ L(shl_8_loop_L1):
jb L(shl_8_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
.p2align 4
L(shl_8_end):
@@ -1079,7 +1079,7 @@ L(shl_8_bwd):
lea (L(shl_8_bwd_loop_L2)-L(shl_8_bwd_loop_L1))(%r9), %r9
L(L8_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_8_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1104,7 +1104,7 @@ L(shl_8_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_8_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_8_bwd_end):
movaps %xmm4, (%rdi)
@@ -1121,7 +1121,7 @@ L(shl_9):
lea (L(shl_9_loop_L2)-L(shl_9_loop_L1))(%r9), %r9
L(L9_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_9_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1144,7 +1144,7 @@ L(shl_9_loop_L1):
jb L(shl_9_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_9_end):
movaps %xmm4, -0x20(%rdi)
@@ -1164,7 +1164,7 @@ L(shl_9_bwd):
lea (L(shl_9_bwd_loop_L2)-L(shl_9_bwd_loop_L1))(%r9), %r9
L(L9_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_9_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1189,7 +1189,7 @@ L(shl_9_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_9_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_9_bwd_end):
movaps %xmm4, (%rdi)
@@ -1206,7 +1206,7 @@ L(shl_10):
lea (L(shl_10_loop_L2)-L(shl_10_loop_L1))(%r9), %r9
L(L10_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_10_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1229,7 +1229,7 @@ L(shl_10_loop_L1):
jb L(shl_10_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_10_end):
movaps %xmm4, -0x20(%rdi)
@@ -1249,7 +1249,7 @@ L(shl_10_bwd):
lea (L(shl_10_bwd_loop_L2)-L(shl_10_bwd_loop_L1))(%r9), %r9
L(L10_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_10_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1274,7 +1274,7 @@ L(shl_10_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_10_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_10_bwd_end):
movaps %xmm4, (%rdi)
@@ -1291,7 +1291,7 @@ L(shl_11):
lea (L(shl_11_loop_L2)-L(shl_11_loop_L1))(%r9), %r9
L(L11_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_11_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1314,7 +1314,7 @@ L(shl_11_loop_L1):
jb L(shl_11_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_11_end):
movaps %xmm4, -0x20(%rdi)
@@ -1334,7 +1334,7 @@ L(shl_11_bwd):
lea (L(shl_11_bwd_loop_L2)-L(shl_11_bwd_loop_L1))(%r9), %r9
L(L11_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_11_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1359,7 +1359,7 @@ L(shl_11_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_11_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_11_bwd_end):
movaps %xmm4, (%rdi)
@@ -1376,7 +1376,7 @@ L(shl_12):
lea (L(shl_12_loop_L2)-L(shl_12_loop_L1))(%r9), %r9
L(L12_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_12_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1399,7 +1399,7 @@ L(shl_12_loop_L1):
jb L(shl_12_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_12_end):
movaps %xmm4, -0x20(%rdi)
@@ -1419,7 +1419,7 @@ L(shl_12_bwd):
lea (L(shl_12_bwd_loop_L2)-L(shl_12_bwd_loop_L1))(%r9), %r9
L(L12_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_12_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1444,7 +1444,7 @@ L(shl_12_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_12_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_12_bwd_end):
movaps %xmm4, (%rdi)
@@ -1461,7 +1461,7 @@ L(shl_13):
lea (L(shl_13_loop_L2)-L(shl_13_loop_L1))(%r9), %r9
L(L13_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_13_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1484,7 +1484,7 @@ L(shl_13_loop_L1):
jb L(shl_13_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_13_end):
movaps %xmm4, -0x20(%rdi)
@@ -1504,7 +1504,7 @@ L(shl_13_bwd):
lea (L(shl_13_bwd_loop_L2)-L(shl_13_bwd_loop_L1))(%r9), %r9
L(L13_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_13_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1529,7 +1529,7 @@ L(shl_13_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_13_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_13_bwd_end):
movaps %xmm4, (%rdi)
@@ -1546,7 +1546,7 @@ L(shl_14):
lea (L(shl_14_loop_L2)-L(shl_14_loop_L1))(%r9), %r9
L(L14_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_14_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1569,7 +1569,7 @@ L(shl_14_loop_L1):
jb L(shl_14_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_14_end):
movaps %xmm4, -0x20(%rdi)
@@ -1589,7 +1589,7 @@ L(shl_14_bwd):
lea (L(shl_14_bwd_loop_L2)-L(shl_14_bwd_loop_L1))(%r9), %r9
L(L14_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_14_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1614,7 +1614,7 @@ L(shl_14_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_14_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_14_bwd_end):
movaps %xmm4, (%rdi)
@@ -1631,7 +1631,7 @@ L(shl_15):
lea (L(shl_15_loop_L2)-L(shl_15_loop_L1))(%r9), %r9
L(L15_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_15_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1654,7 +1654,7 @@ L(shl_15_loop_L1):
jb L(shl_15_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_15_end):
movaps %xmm4, -0x20(%rdi)
@@ -1674,7 +1674,7 @@ L(shl_15_bwd):
lea (L(shl_15_bwd_loop_L2)-L(shl_15_bwd_loop_L1))(%r9), %r9
L(L15_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_15_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1699,7 +1699,7 @@ L(shl_15_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_15_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_15_bwd_end):
movaps %xmm4, (%rdi)