summaryrefslogtreecommitdiff
path: root/vp8/common/arm/neon
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/common/arm/neon')
-rw-r--r--vp8/common/arm/neon/iwalsh_neon.asm37
1 files changed, 22 insertions, 15 deletions
diff --git a/vp8/common/arm/neon/iwalsh_neon.asm b/vp8/common/arm/neon/iwalsh_neon.asm
index 01c79d937..e8ea2a619 100644
--- a/vp8/common/arm/neon/iwalsh_neon.asm
+++ b/vp8/common/arm/neon/iwalsh_neon.asm
@@ -8,7 +8,6 @@
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_short_inv_walsh4x4_neon|
- EXPORT |vp8_short_inv_walsh4x4_1_neon|
ARM
REQUIRE8
@@ -16,7 +15,7 @@
AREA |.text|, CODE, READONLY ; name this block of code
-;short vp8_short_inv_walsh4x4_neon(short *input, short *output)
+;short vp8_short_inv_walsh4x4_neon(short *input, short *mb_dqcoeff)
|vp8_short_inv_walsh4x4_neon| PROC
; read in all four lines of values: d0->d3
@@ -59,22 +58,30 @@
vshr.s16 q0, q0, #3 ;e/f >> 3
vshr.s16 q1, q1, #3 ;g/h >> 3
- vst4.i16 {d0,d1,d2,d3}, [r1@128]
+ mov r2, #64
+ add r3, r1, #32
- bx lr
- ENDP ; |vp8_short_inv_walsh4x4_neon|
+ vst1.i16 d0[0], [r1],r2
+ vst1.i16 d1[0], [r3],r2
+ vst1.i16 d2[0], [r1],r2
+ vst1.i16 d3[0], [r3],r2
+
+ vst1.i16 d0[1], [r1],r2
+ vst1.i16 d1[1], [r3],r2
+ vst1.i16 d2[1], [r1],r2
+ vst1.i16 d3[1], [r3],r2
+ vst1.i16 d0[2], [r1],r2
+ vst1.i16 d1[2], [r3],r2
+ vst1.i16 d2[2], [r1],r2
+ vst1.i16 d3[2], [r3],r2
+
+ vst1.i16 d0[3], [r1],r2
+ vst1.i16 d1[3], [r3],r2
+ vst1.i16 d2[3], [r1]
+ vst1.i16 d3[3], [r3]
-;short vp8_short_inv_walsh4x4_1_neon(short *input, short *output)
-|vp8_short_inv_walsh4x4_1_neon| PROC
- ldrsh r2, [r0] ; load input[0]
- add r3, r2, #3 ; add 3
- add r2, r1, #16 ; base for last 8 output
- asr r0, r3, #3 ; right shift 3
- vdup.16 q0, r0 ; load and duplicate
- vst1.16 {q0}, [r1@128] ; write back 8
- vst1.16 {q0}, [r2@128] ; write back last 8
bx lr
- ENDP ; |vp8_short_inv_walsh4x4_1_neon|
+ ENDP ; |vp8_short_inv_walsh4x4_neon|
END