diff options
author | James Zern <jzern@google.com> | 2015-06-03 18:51:13 -0700 |
---|---|---|
committer | James Zern <jzern@google.com> | 2015-06-03 18:51:13 -0700 |
commit | 65d9599807330af74033b50f3063b2bafd539995 (patch) | |
tree | a83ca31d9a88265dc97395d03f05c2d8625fefbe /vp9/common | |
parent | 5df6c0458555dd03fd5796e9d9342d1394ade446 (diff) | |
download | libvpx-65d9599807330af74033b50f3063b2bafd539995.tar libvpx-65d9599807330af74033b50f3063b2bafd539995.tar.gz libvpx-65d9599807330af74033b50f3063b2bafd539995.tar.bz2 libvpx-65d9599807330af74033b50f3063b2bafd539995.zip |
vp9_reconintra_neon_asm/tm4x4: simplify left load
use vld1.8 {d0[]}, [r0] rather than ldrb+vdup; mildly faster
Change-Id: Ia5ffc736bcb0f5497b7d9e55a93bf5a5f5f6928c
Diffstat (limited to 'vp9/common')
-rw-r--r-- | vp9/common/arm/neon/vp9_reconintra_neon_asm.asm | 19 |
1 files changed, 9 insertions, 10 deletions
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm b/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm index d4f6d9b48..14f574a50 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm +++ b/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm @@ -298,8 +298,7 @@ loop_h |vp9_tm_predictor_4x4_neon| PROC ; Load ytop_left = above[-1]; sub r12, r2, #1 - ldrb r12, [r12] - vdup.u8 d0, r12 + vld1.u8 {d0[]}, [r12] ; Load above 4 pixels vld1.32 {d2[0]}, [r2] @@ -309,10 +308,10 @@ loop_h ; Load left row by row and compute left + (above - ytop_left) ; 1st row and 2nd row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 + vld1.u8 {d2[]}, [r3]! + vld1.u8 {d4[]}, [r3]! + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 vadd.s16 q1, q1, q3 vadd.s16 q2, q2, q3 vqmovun.s16 d0, q1 @@ -321,10 +320,10 @@ loop_h vst1.32 {d1[0]}, [r0], r1 ; 3rd row and 4th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 + vld1.u8 {d2[]}, [r3]! + vld1.u8 {d4[]}, [r3] + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 vadd.s16 q1, q1, q3 vadd.s16 q2, q2, q3 vqmovun.s16 d0, q1 |