diff options
author | hkuang <hkuang@google.com> | 2014-01-17 16:25:07 -0800 |
---|---|---|
committer | hkuang <hkuang@google.com> | 2014-01-22 13:43:20 -0800 |
commit | 97826df96b86176d8cd23d398922349950853300 (patch) | |
tree | 358fab8dd68f2aeffaaec63d02f94f926e7885c0 /vp9/common/arm | |
parent | a1d8918fd13e61753f7f420351cd7b9a2d6cc175 (diff) | |
download | libvpx-97826df96b86176d8cd23d398922349950853300.tar libvpx-97826df96b86176d8cd23d398922349950853300.tar.gz libvpx-97826df96b86176d8cd23d398922349950853300.tar.bz2 libvpx-97826df96b86176d8cd23d398922349950853300.zip |
Add tm_predictor_8x8 neon implementation.
Change-Id: I76c2720546b737cb63018a8ab6a3ff62a291786d
Diffstat (limited to 'vp9/common/arm')
-rw-r--r-- | vp9/common/arm/neon/vp9_reconintra_neon.asm | 73 |
1 files changed, 72 insertions, 1 deletions
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm index 98619bb30..71bf24c9f 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon.asm +++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm @@ -17,6 +17,7 @@ EXPORT |vp9_h_predictor_16x16_neon| EXPORT |vp9_h_predictor_32x32_neon| EXPORT |vp9_tm_predictor_4x4_neon| + EXPORT |vp9_tm_predictor_8x8_neon| ARM REQUIRE8 PRESERVE8 @@ -328,8 +329,78 @@ loop_h vqshrun.s16 d1, q2, #0 vst1.32 {d0[0]}, [r0], r1 vst1.32 {d1[0]}, [r0], r1 - bx lr ENDP ; |vp9_tm_predictor_4x4_neon| +;void vp9_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vp9_tm_predictor_8x8_neon| PROC + ; Load ytop_left = above[-1]; + sub r12, r2, #1 + ldrb r12, [r12] + vdup.u8 d0, r12 + + ; Load above 8 pixels + vld1.64 {d2}, [r2] + + ; Compute above - ytop_left + vsubl.u8 q3, d2, d0 + + ; Load left row by row and compute left + (above - ytop_left) + ; 1st row and 2nd row + ldrb r12, [r3], #1 + ldrb r2, [r3], #1 + vdup.u16 q1, r12 + vdup.u16 q2, r2 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqshrun.s16 d0, q1, #0 + vqshrun.s16 d1, q2, #0 + vst1.64 {d0}, [r0], r1 + vst1.64 {d1}, [r0], r1 + + ; 3rd row and 4th row + ldrb r12, [r3], #1 + ldrb r2, [r3], #1 + vdup.u16 q1, r12 + vdup.u16 q2, r2 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqshrun.s16 d0, q1, #0 + vqshrun.s16 d1, q2, #0 + vst1.64 {d0}, [r0], r1 + vst1.64 {d1}, [r0], r1 + + ; 5th row and 6th row + ldrb r12, [r3], #1 + ldrb r2, [r3], #1 + vdup.u16 q1, r12 + vdup.u16 q2, r2 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqshrun.s16 d0, q1, #0 + vqshrun.s16 d1, q2, #0 + vst1.64 {d0}, [r0], r1 + vst1.64 {d1}, [r0], r1 + + ; 7rd row and 8th row + ldrb r12, [r3], #1 + ldrb r2, [r3], #1 + vdup.u16 q1, r12 + vdup.u16 q2, r2 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqshrun.s16 d0, q1, #0 + vqshrun.s16 d1, q2, #0 + vst1.64 {d0}, [r0], r1 + vst1.64 {d1}, [r0], r1 + bx lr + ENDP ; |vp9_tm_predictor_8x8_neon| + END |