diff options
author | Frank Galligan <fgalligan@google.com> | 2014-01-24 11:07:14 -0800 |
---|---|---|
committer | Frank Galligan <fgalligan@google.com> | 2014-01-24 11:07:14 -0800 |
commit | 56a8a0b54bc7c5c85ba734672dbeee2eae7cdf10 (patch) | |
tree | d3a73173817bb3f04f6d246450e6a25ed0a0c076 /vp9 | |
parent | 8b0537f631754c9b06a97ebc3ccc1e3f1dc05eea (diff) | |
download | libvpx-56a8a0b54bc7c5c85ba734672dbeee2eae7cdf10.tar libvpx-56a8a0b54bc7c5c85ba734672dbeee2eae7cdf10.tar.gz libvpx-56a8a0b54bc7c5c85ba734672dbeee2eae7cdf10.tar.bz2 libvpx-56a8a0b54bc7c5c85ba734672dbeee2eae7cdf10.zip |
Optimize vp9_tm_predictor_8x8_neon function
Change-Id: Ia12aae491202098ff66366145aa0c3da38dc97e5
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/arm/neon/vp9_reconintra_neon.asm | 63 |
1 files changed, 31 insertions, 32 deletions
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm index 71bf24c9f..c90c26b36 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon.asm +++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm @@ -353,53 +353,52 @@ loop_h vsubl.u8 q3, d2, d0 ; Load left row by row and compute left + (above - ytop_left) + vld1.u8 {d6}, [r3] + ; 1st row and 2nd row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqshrun.s16 d0, q1, #0 - vqshrun.s16 d1, q2, #0 - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 + vdup.u8 d0, d6[0] + vdup.u8 d1, d6[1] + vaddw.s16 q1, q3, d0 + vaddw.s16 q2, q3, d1 ; 3rd row and 4th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 + vdup.u8 d0, d6[2] + vdup.u8 d1, d6[3] + vaddw.s16 q8, q3, d0 + vaddw.s16 q9, q3, d1 + vqshrun.s16 d0, q1, #0 vqshrun.s16 d1, q2, #0 + vqshrun.s16 d2, q8, #0 + vqshrun.s16 d3, q9, #0 + vst1.64 {d0}, [r0], r1 vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 ; 5th row and 6th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqshrun.s16 d0, q1, #0 - vqshrun.s16 d1, q2, #0 - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 + vdup.u8 d0, d6[4] + vdup.u8 d1, d6[5] + vaddw.s16 q1, q3, d0 + vaddw.s16 q2, q3, d1 ; 7rd row and 8th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 + vdup.u8 d0, d6[6] + vdup.u8 d1, d6[7] + vaddw.s16 q8, q3, d0 + vaddw.s16 q9, q3, d1 + vqshrun.s16 d0, q1, #0 vqshrun.s16 d1, q2, #0 + vqshrun.s16 d2, q8, #0 + vqshrun.s16 d3, q9, #0 + vst1.64 {d0}, [r0], r1 vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 + bx lr ENDP ; |vp9_tm_predictor_8x8_neon| |