diff options
author | Scott LaVarnway <slavarnway@google.com> | 2011-10-18 12:06:50 -0400 |
---|---|---|
committer | Scott LaVarnway <slavarnway@google.com> | 2011-10-18 12:06:50 -0400 |
commit | ed9c66f5844cd6fd28c7f92e84a769170a56462e (patch) | |
tree | 10f716283111c3765038ed596eb1dcfd6b695141 /vp8/common/arm/armv6/recon_v6.asm | |
parent | 6505adf271ff9ad79d024f63ebbdc2819e6ef259 (diff) | |
download | libvpx-ed9c66f5844cd6fd28c7f92e84a769170a56462e.tar libvpx-ed9c66f5844cd6fd28c7f92e84a769170a56462e.tar.gz libvpx-ed9c66f5844cd6fd28c7f92e84a769170a56462e.tar.bz2 libvpx-ed9c66f5844cd6fd28c7f92e84a769170a56462e.zip |
Remove usage of predict buffer for decode
Instead of using the predict buffer, the decoder now writes
the predictor into the recon buffer. For blocks with eob=0,
unnecessary idcts can be eliminated. This gave a performance
boost of ~1.8% for the HD clips used.
Tero: Added needed changes to ARM side and scheduled some
assembly code to prevent interlocks.
Patch Set 6: Merged (I1bcdca7a95aacc3a181b9faa6b10e3a71ee24df3)
into this commit because of similarities in the idct
functions.
Patch Set 7: EC bug fix.
Change-Id: Ie31d90b5d3522e1108163f2ac491e455e3f955e6
Diffstat (limited to 'vp8/common/arm/armv6/recon_v6.asm')
-rw-r--r-- | vp8/common/arm/armv6/recon_v6.asm | 281 |
1 files changed, 0 insertions, 281 deletions
diff --git a/vp8/common/arm/armv6/recon_v6.asm b/vp8/common/arm/armv6/recon_v6.asm deleted file mode 100644 index 99c7bcf2d..000000000 --- a/vp8/common/arm/armv6/recon_v6.asm +++ /dev/null @@ -1,281 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_recon_b_armv6| - EXPORT |vp8_recon2b_armv6| - EXPORT |vp8_recon4b_armv6| - - AREA |.text|, CODE, READONLY ; name this block of code -prd RN r0 -dif RN r1 -dst RN r2 -stride RN r3 - -;void recon_b(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride) -; R0 char* pred_ptr -; R1 short * dif_ptr -; R2 char * dst_ptr -; R3 int stride - -; Description: -; Loop through the block adding the Pred and Diff together. Clamp and then -; store back into the Dst. - -; Restrictions : -; all buffers are expected to be 4 byte aligned coming in and -; going out. -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -; -; -; -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -|vp8_recon_b_armv6| PROC - stmdb sp!, {r4 - r9, lr} - - ;0, 1, 2, 3 - ldr r4, [prd], #16 ; 3 | 2 | 1 | 0 - ldr r6, [dif, #0] ; 1 | 0 - ldr r7, [dif, #4] ; 3 | 2 - - pkhbt r8, r6, r7, lsl #16 ; 2 | 0 - pkhtb r9, r7, r6, asr #16 ; 3 | 1 - - uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0 - uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1 - - usat16 r8, #8, r8 - usat16 r9, #8, r9 - add dif, dif, #32 - orr r8, r8, r9, lsl #8 - - str r8, [dst], stride - - ;0, 1, 2, 3 - ldr r4, [prd], #16 ; 3 | 2 | 1 | 0 -;; ldr r6, [dif, #8] ; 1 | 0 -;; ldr r7, [dif, #12] ; 3 | 2 - ldr r6, [dif, #0] ; 1 | 0 - ldr r7, [dif, #4] ; 3 | 2 - - pkhbt r8, r6, r7, lsl #16 ; 2 | 0 - pkhtb r9, r7, r6, asr #16 ; 3 | 1 - - uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0 - uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1 - - usat16 r8, #8, r8 - usat16 r9, #8, r9 - add dif, dif, #32 - orr r8, r8, r9, lsl #8 - - str r8, [dst], stride - - ;0, 1, 2, 3 - ldr r4, [prd], #16 ; 3 | 2 | 1 | 0 -;; ldr r6, [dif, #16] ; 1 | 0 -;; ldr r7, [dif, #20] ; 3 | 2 - ldr r6, [dif, #0] ; 1 | 0 - ldr r7, [dif, #4] ; 3 | 2 - - pkhbt r8, r6, r7, lsl #16 ; 2 | 0 - pkhtb r9, r7, r6, asr #16 ; 3 | 1 - - uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0 - uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1 - - usat16 r8, #8, r8 - usat16 r9, #8, r9 - add dif, dif, #32 - orr r8, r8, r9, lsl #8 - - str r8, [dst], stride - - ;0, 1, 2, 3 - ldr r4, [prd], #16 ; 3 | 2 | 1 | 0 -;; ldr r6, [dif, #24] ; 1 | 0 -;; ldr r7, [dif, #28] ; 3 | 2 - ldr r6, [dif, #0] ; 1 | 0 - ldr r7, [dif, #4] ; 3 | 2 - - pkhbt r8, r6, r7, lsl #16 ; 2 | 0 - pkhtb r9, r7, r6, asr #16 ; 3 | 1 - - uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0 - uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1 - - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst], stride - - ldmia sp!, {r4 - r9, pc} - - ENDP ; |recon_b| - -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -; -; -; -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -; R0 char *pred_ptr -; R1 short *dif_ptr -; R2 char *dst_ptr -; R3 int stride -|vp8_recon4b_armv6| PROC - stmdb sp!, {r4 - r9, lr} - - mov lr, #4 - -recon4b_loop - ;0, 1, 2, 3 - ldr r4, [prd], #4 ; 3 | 2 | 1 | 0 - ldr r6, [dif, #0] ; 1 | 0 - ldr r7, [dif, #4] ; 3 | 2 - - pkhbt r8, r6, r7, lsl #16 ; 2 | 0 - pkhtb r9, r7, r6, asr #16 ; 3 | 1 - - uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0 - uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1 - - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst] - - ;4, 5, 6, 7 - ldr r4, [prd], #4 -;; ldr r6, [dif, #32] -;; ldr r7, [dif, #36] - ldr r6, [dif, #8] - ldr r7, [dif, #12] - - pkhbt r8, r6, r7, lsl #16 - pkhtb r9, r7, r6, asr #16 - - uxtab16 r8, r8, r4 - uxtab16 r9, r9, r4, ror #8 - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst, #4] - - ;8, 9, 10, 11 - ldr r4, [prd], #4 -;; ldr r6, [dif, #64] -;; ldr r7, [dif, #68] - ldr r6, [dif, #16] - ldr r7, [dif, #20] - - pkhbt r8, r6, r7, lsl #16 - pkhtb r9, r7, r6, asr #16 - - uxtab16 r8, r8, r4 - uxtab16 r9, r9, r4, ror #8 - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst, #8] - - ;12, 13, 14, 15 - ldr r4, [prd], #4 -;; ldr r6, [dif, #96] -;; ldr r7, [dif, #100] - ldr r6, [dif, #24] - ldr r7, [dif, #28] - - pkhbt r8, r6, r7, lsl #16 - pkhtb r9, r7, r6, asr #16 - - uxtab16 r8, r8, r4 - uxtab16 r9, r9, r4, ror #8 - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst, #12] - - add dst, dst, stride -;; add dif, dif, #8 - add dif, dif, #32 - - subs lr, lr, #1 - bne recon4b_loop - - ldmia sp!, {r4 - r9, pc} - - ENDP ; |Recon4B| - -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -; -; -; -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -; R0 char *pred_ptr -; R1 short *dif_ptr -; R2 char *dst_ptr -; R3 int stride -|vp8_recon2b_armv6| PROC - stmdb sp!, {r4 - r9, lr} - - mov lr, #4 - -recon2b_loop - ;0, 1, 2, 3 - ldr r4, [prd], #4 - ldr r6, [dif, #0] - ldr r7, [dif, #4] - - pkhbt r8, r6, r7, lsl #16 - pkhtb r9, r7, r6, asr #16 - - uxtab16 r8, r8, r4 - uxtab16 r9, r9, r4, ror #8 - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst] - - ;4, 5, 6, 7 - ldr r4, [prd], #4 -;; ldr r6, [dif, #32] -;; ldr r7, [dif, #36] - ldr r6, [dif, #8] - ldr r7, [dif, #12] - - pkhbt r8, r6, r7, lsl #16 - pkhtb r9, r7, r6, asr #16 - - uxtab16 r8, r8, r4 - uxtab16 r9, r9, r4, ror #8 - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst, #4] - - add dst, dst, stride -;; add dif, dif, #8 - add dif, dif, #16 - - subs lr, lr, #1 - bne recon2b_loop - - ldmia sp!, {r4 - r9, pc} - - ENDP ; |Recon2B| - - END |