diff options
author | Frank Galligan <fgalligan@google.com> | 2014-01-24 16:21:56 -0800 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2014-01-24 16:21:56 -0800 |
commit | 183361dadb7ae77c8750edcdfc37ff0b778ac124 (patch) | |
tree | c4c50d7a86b8bc86dbc9c97893ec4c3b44673325 /vp9/common/arm/neon | |
parent | 7adfa9f4cb7341f1bdddad23946d216c107a0751 (diff) | |
parent | 56a8a0b54bc7c5c85ba734672dbeee2eae7cdf10 (diff) | |
download | libvpx-183361dadb7ae77c8750edcdfc37ff0b778ac124.tar libvpx-183361dadb7ae77c8750edcdfc37ff0b778ac124.tar.gz libvpx-183361dadb7ae77c8750edcdfc37ff0b778ac124.tar.bz2 libvpx-183361dadb7ae77c8750edcdfc37ff0b778ac124.zip |
Merge "Optimize vp9_tm_predictor_8x8_neon function"
Diffstat (limited to 'vp9/common/arm/neon')
-rw-r--r-- | vp9/common/arm/neon/vp9_reconintra_neon.asm | 63 |
1 files changed, 31 insertions, 32 deletions
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm index ba1aebf31..65d087af8 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon.asm +++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm @@ -354,53 +354,52 @@ loop_h vsubl.u8 q3, d2, d0 ; Load left row by row and compute left + (above - ytop_left) + vld1.u8 {d6}, [r3] + ; 1st row and 2nd row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqshrun.s16 d0, q1, #0 - vqshrun.s16 d1, q2, #0 - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 + vdup.u8 d0, d6[0] + vdup.u8 d1, d6[1] + vaddw.s16 q1, q3, d0 + vaddw.s16 q2, q3, d1 ; 3rd row and 4th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 + vdup.u8 d0, d6[2] + vdup.u8 d1, d6[3] + vaddw.s16 q8, q3, d0 + vaddw.s16 q9, q3, d1 + vqshrun.s16 d0, q1, #0 vqshrun.s16 d1, q2, #0 + vqshrun.s16 d2, q8, #0 + vqshrun.s16 d3, q9, #0 + vst1.64 {d0}, [r0], r1 vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 ; 5th row and 6th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqshrun.s16 d0, q1, #0 - vqshrun.s16 d1, q2, #0 - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 + vdup.u8 d0, d6[4] + vdup.u8 d1, d6[5] + vaddw.s16 q1, q3, d0 + vaddw.s16 q2, q3, d1 ; 7rd row and 8th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 + vdup.u8 d0, d6[6] + vdup.u8 d1, d6[7] + vaddw.s16 q8, q3, d0 + vaddw.s16 q9, q3, d1 + vqshrun.s16 d0, q1, #0 vqshrun.s16 d1, q2, #0 + vqshrun.s16 d2, q8, #0 + vqshrun.s16 d3, q9, #0 + vst1.64 {d0}, [r0], r1 vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 + bx lr ENDP ; |vp9_tm_predictor_8x8_neon| |