diff options
author | Yunqing Wang <yunqingwang@google.com> | 2014-04-28 14:42:23 -0700 |
---|---|---|
committer | Yunqing Wang <yunqingwang@google.com> | 2014-04-28 14:51:53 -0700 |
commit | 33df6d1fc1d268b4901b74b4141f83594266f041 (patch) | |
tree | 87031fef033550336f5d32c3cfe55ae4cf6d2e09 /vp8/common/arm/neon/sixtappredict16x16_neon.asm | |
parent | 5ba44e37a470be7ec74f717c293cfcb864c84a0d (diff) | |
download | libvpx-33df6d1fc1d268b4901b74b4141f83594266f041.tar libvpx-33df6d1fc1d268b4901b74b4141f83594266f041.tar.gz libvpx-33df6d1fc1d268b4901b74b4141f83594266f041.tar.bz2 libvpx-33df6d1fc1d268b4901b74b4141f83594266f041.zip |
Save NEON registers in VP8 NEON functions
The recent compiler can generate optimized code that uses NEON registers
for various operations besides floating-point operations. Therefore,
only saving callee-saved registers d8 - d15 at the beginning of the
encoder/decoder is not enough anymore. This patch added register saving
code in VP8 NEON functions that use those registers.
Change-Id: Ie9e44f5188cf410990c8aaaac68faceee9dffd31
Diffstat (limited to 'vp8/common/arm/neon/sixtappredict16x16_neon.asm')
-rw-r--r-- | vp8/common/arm/neon/sixtappredict16x16_neon.asm | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/vp8/common/arm/neon/sixtappredict16x16_neon.asm index 9fdafd360..dd27719bf 100644 --- a/vp8/common/arm/neon/sixtappredict16x16_neon.asm +++ b/vp8/common/arm/neon/sixtappredict16x16_neon.asm @@ -43,10 +43,11 @@ filter16_coeff |vp8_sixtap_predict16x16_neon| PROC push {r4-r5, lr} + vpush {d8-d15} adr r12, filter16_coeff - ldr r4, [sp, #12] ;load parameters from stack - ldr r5, [sp, #16] ;load parameters from stack + ldr r4, [sp, #76] ;load parameters from stack + ldr r5, [sp, #80] ;load parameters from stack cmp r2, #0 ;skip first_pass filter if xoffset=0 beq secondpass_filter16x16_only @@ -291,6 +292,8 @@ secondpass_inner_loop_neon bne filt_blk2d_sp16x16_outloop_neon add sp, sp, #336 + + vpop {d8-d15} pop {r4-r5,pc} ;-------------------- @@ -384,6 +387,7 @@ filt_blk2d_fpo16x16_loop_neon bne filt_blk2d_fpo16x16_loop_neon + vpop {d8-d15} pop {r4-r5,pc} ;-------------------- @@ -482,6 +486,7 @@ secondpass_only_inner_loop_neon bne filt_blk2d_spo16x16_outloop_neon + vpop {d8-d15} pop {r4-r5,pc} ENDP |