diff options
Diffstat (limited to 'vp8/encoder/arm/neon')
-rw-r--r-- | vp8/encoder/arm/neon/subtract_neon.asm | 14 | ||||
-rw-r--r-- | vp8/encoder/arm/neon/vp8_memcpy_neon.asm | 2 | ||||
-rw-r--r-- | vp8/encoder/arm/neon/vp8_mse16x16_neon.asm | 9 |
3 files changed, 20 insertions, 5 deletions
diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm index 5bda78678..840cb33d9 100644 --- a/vp8/encoder/arm/neon/subtract_neon.asm +++ b/vp8/encoder/arm/neon/subtract_neon.asm @@ -65,8 +65,10 @@ ; unsigned char *pred, int pred_stride) |vp8_subtract_mby_neon| PROC push {r4-r7} + vpush {d8-d15} + mov r12, #4 - ldr r4, [sp, #16] ; pred_stride + ldr r4, [sp, #80] ; pred_stride mov r6, #32 ; "diff" stride x2 add r5, r0, #16 ; second diff pointer @@ -101,6 +103,7 @@ subtract_mby_loop subs r12, r12, #1 bne subtract_mby_loop + vpop {d8-d15} pop {r4-r7} bx lr ENDP @@ -112,9 +115,11 @@ subtract_mby_loop |vp8_subtract_mbuv_neon| PROC push {r4-r7} - ldr r4, [sp, #16] ; upred - ldr r5, [sp, #20] ; vpred - ldr r6, [sp, #24] ; pred_stride + vpush {d8-d15} + + ldr r4, [sp, #80] ; upred + ldr r5, [sp, #84] ; vpred + ldr r6, [sp, #88] ; pred_stride add r0, r0, #512 ; short *udiff = diff + 256; mov r12, #32 ; "diff" stride x2 add r7, r0, #16 ; second diff pointer @@ -191,6 +196,7 @@ subtract_mby_loop vst1.16 {q14}, [r0], r12 vst1.16 {q15}, [r7], r12 + vpop {d8-d15} pop {r4-r7} bx lr diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm index 5b9f11e59..d219e2d14 100644 --- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm +++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm @@ -21,6 +21,7 @@ ;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr, ; int sz); |vp8_memcpy_partial_neon| PROC + vpush {d8-d15} ;pld [r1] ;preload pred data ;pld [r1, #128] ;pld [r1, #256] @@ -64,6 +65,7 @@ extra_copy_neon_loop bne extra_copy_neon_loop done_copy_neon_loop + vpop {d8-d15} bx lr ENDP diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm index 55edbf512..f82af3ee3 100644 --- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm +++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm @@ -27,6 +27,8 @@ ;from vp8_variance(). |vp8_mse16x16_neon| PROC + vpush {q7} + vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse vmov.i8 q8, #0 vmov.i8 q9, #0 @@ -62,7 +64,7 @@ mse16x16_neon_loop vadd.u32 q7, q7, q8 vadd.u32 q9, q9, q10 - ldr r12, [sp] ;load *sse from stack + ldr r12, [sp, #16] ;load *sse from stack vadd.u32 q10, q7, q9 vpaddl.u32 q1, q10 @@ -71,6 +73,7 @@ mse16x16_neon_loop vst1.32 {d0[0]}, [r12] vmov.32 r0, d0[0] + vpop {q7} bx lr ENDP @@ -82,6 +85,8 @@ mse16x16_neon_loop ; r2 unsigned char *ref_ptr, ; r3 int recon_stride |vp8_get4x4sse_cs_neon| PROC + vpush {q7} + vld1.8 {d0}, [r0], r1 ;Load up source and reference vld1.8 {d4}, [r2], r3 vld1.8 {d1}, [r0], r1 @@ -109,6 +114,8 @@ mse16x16_neon_loop vadd.u64 d0, d2, d3 vmov.32 r0, d0[0] + + vpop {q7} bx lr ENDP |