summaryrefslogtreecommitdiff
path: root/vp8/common/arm/armv6/recon_v6.asm
diff options
context:
space:
mode:
authorScott LaVarnway <slavarnway@google.com>2011-10-18 12:06:50 -0400
committerScott LaVarnway <slavarnway@google.com>2011-10-18 12:06:50 -0400
commited9c66f5844cd6fd28c7f92e84a769170a56462e (patch)
tree10f716283111c3765038ed596eb1dcfd6b695141 /vp8/common/arm/armv6/recon_v6.asm
parent6505adf271ff9ad79d024f63ebbdc2819e6ef259 (diff)
downloadlibvpx-ed9c66f5844cd6fd28c7f92e84a769170a56462e.tar
libvpx-ed9c66f5844cd6fd28c7f92e84a769170a56462e.tar.gz
libvpx-ed9c66f5844cd6fd28c7f92e84a769170a56462e.tar.bz2
libvpx-ed9c66f5844cd6fd28c7f92e84a769170a56462e.zip
Remove usage of predict buffer for decode
Instead of using the predict buffer, the decoder now writes the predictor into the recon buffer. For blocks with eob=0, unnecessary idcts can be eliminated. This gave a performance boost of ~1.8% for the HD clips used. Tero: Added needed changes to ARM side and scheduled some assembly code to prevent interlocks. Patch Set 6: Merged (I1bcdca7a95aacc3a181b9faa6b10e3a71ee24df3) into this commit because of similarities in the idct functions. Patch Set 7: EC bug fix. Change-Id: Ie31d90b5d3522e1108163f2ac491e455e3f955e6
Diffstat (limited to 'vp8/common/arm/armv6/recon_v6.asm')
-rw-r--r--vp8/common/arm/armv6/recon_v6.asm281
1 files changed, 0 insertions, 281 deletions
diff --git a/vp8/common/arm/armv6/recon_v6.asm b/vp8/common/arm/armv6/recon_v6.asm
deleted file mode 100644
index 99c7bcf2d..000000000
--- a/vp8/common/arm/armv6/recon_v6.asm
+++ /dev/null
@@ -1,281 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_recon_b_armv6|
- EXPORT |vp8_recon2b_armv6|
- EXPORT |vp8_recon4b_armv6|
-
- AREA |.text|, CODE, READONLY ; name this block of code
-prd RN r0
-dif RN r1
-dst RN r2
-stride RN r3
-
-;void recon_b(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride)
-; R0 char* pred_ptr
-; R1 short * dif_ptr
-; R2 char * dst_ptr
-; R3 int stride
-
-; Description:
-; Loop through the block adding the Pred and Diff together. Clamp and then
-; store back into the Dst.
-
-; Restrictions :
-; all buffers are expected to be 4 byte aligned coming in and
-; going out.
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-;
-;
-;
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-|vp8_recon_b_armv6| PROC
- stmdb sp!, {r4 - r9, lr}
-
- ;0, 1, 2, 3
- ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
- ldr r6, [dif, #0] ; 1 | 0
- ldr r7, [dif, #4] ; 3 | 2
-
- pkhbt r8, r6, r7, lsl #16 ; 2 | 0
- pkhtb r9, r7, r6, asr #16 ; 3 | 1
-
- uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
- uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
-
- usat16 r8, #8, r8
- usat16 r9, #8, r9
- add dif, dif, #32
- orr r8, r8, r9, lsl #8
-
- str r8, [dst], stride
-
- ;0, 1, 2, 3
- ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
-;; ldr r6, [dif, #8] ; 1 | 0
-;; ldr r7, [dif, #12] ; 3 | 2
- ldr r6, [dif, #0] ; 1 | 0
- ldr r7, [dif, #4] ; 3 | 2
-
- pkhbt r8, r6, r7, lsl #16 ; 2 | 0
- pkhtb r9, r7, r6, asr #16 ; 3 | 1
-
- uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
- uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
-
- usat16 r8, #8, r8
- usat16 r9, #8, r9
- add dif, dif, #32
- orr r8, r8, r9, lsl #8
-
- str r8, [dst], stride
-
- ;0, 1, 2, 3
- ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
-;; ldr r6, [dif, #16] ; 1 | 0
-;; ldr r7, [dif, #20] ; 3 | 2
- ldr r6, [dif, #0] ; 1 | 0
- ldr r7, [dif, #4] ; 3 | 2
-
- pkhbt r8, r6, r7, lsl #16 ; 2 | 0
- pkhtb r9, r7, r6, asr #16 ; 3 | 1
-
- uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
- uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
-
- usat16 r8, #8, r8
- usat16 r9, #8, r9
- add dif, dif, #32
- orr r8, r8, r9, lsl #8
-
- str r8, [dst], stride
-
- ;0, 1, 2, 3
- ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
-;; ldr r6, [dif, #24] ; 1 | 0
-;; ldr r7, [dif, #28] ; 3 | 2
- ldr r6, [dif, #0] ; 1 | 0
- ldr r7, [dif, #4] ; 3 | 2
-
- pkhbt r8, r6, r7, lsl #16 ; 2 | 0
- pkhtb r9, r7, r6, asr #16 ; 3 | 1
-
- uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
- uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
-
- usat16 r8, #8, r8
- usat16 r9, #8, r9
- orr r8, r8, r9, lsl #8
-
- str r8, [dst], stride
-
- ldmia sp!, {r4 - r9, pc}
-
- ENDP ; |recon_b|
-
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-;
-;
-;
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-; R0 char *pred_ptr
-; R1 short *dif_ptr
-; R2 char *dst_ptr
-; R3 int stride
-|vp8_recon4b_armv6| PROC
- stmdb sp!, {r4 - r9, lr}
-
- mov lr, #4
-
-recon4b_loop
- ;0, 1, 2, 3
- ldr r4, [prd], #4 ; 3 | 2 | 1 | 0
- ldr r6, [dif, #0] ; 1 | 0
- ldr r7, [dif, #4] ; 3 | 2
-
- pkhbt r8, r6, r7, lsl #16 ; 2 | 0
- pkhtb r9, r7, r6, asr #16 ; 3 | 1
-
- uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
- uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
-
- usat16 r8, #8, r8
- usat16 r9, #8, r9
- orr r8, r8, r9, lsl #8
-
- str r8, [dst]
-
- ;4, 5, 6, 7
- ldr r4, [prd], #4
-;; ldr r6, [dif, #32]
-;; ldr r7, [dif, #36]
- ldr r6, [dif, #8]
- ldr r7, [dif, #12]
-
- pkhbt r8, r6, r7, lsl #16
- pkhtb r9, r7, r6, asr #16
-
- uxtab16 r8, r8, r4
- uxtab16 r9, r9, r4, ror #8
- usat16 r8, #8, r8
- usat16 r9, #8, r9
- orr r8, r8, r9, lsl #8
-
- str r8, [dst, #4]
-
- ;8, 9, 10, 11
- ldr r4, [prd], #4
-;; ldr r6, [dif, #64]
-;; ldr r7, [dif, #68]
- ldr r6, [dif, #16]
- ldr r7, [dif, #20]
-
- pkhbt r8, r6, r7, lsl #16
- pkhtb r9, r7, r6, asr #16
-
- uxtab16 r8, r8, r4
- uxtab16 r9, r9, r4, ror #8
- usat16 r8, #8, r8
- usat16 r9, #8, r9
- orr r8, r8, r9, lsl #8
-
- str r8, [dst, #8]
-
- ;12, 13, 14, 15
- ldr r4, [prd], #4
-;; ldr r6, [dif, #96]
-;; ldr r7, [dif, #100]
- ldr r6, [dif, #24]
- ldr r7, [dif, #28]
-
- pkhbt r8, r6, r7, lsl #16
- pkhtb r9, r7, r6, asr #16
-
- uxtab16 r8, r8, r4
- uxtab16 r9, r9, r4, ror #8
- usat16 r8, #8, r8
- usat16 r9, #8, r9
- orr r8, r8, r9, lsl #8
-
- str r8, [dst, #12]
-
- add dst, dst, stride
-;; add dif, dif, #8
- add dif, dif, #32
-
- subs lr, lr, #1
- bne recon4b_loop
-
- ldmia sp!, {r4 - r9, pc}
-
- ENDP ; |Recon4B|
-
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-;
-;
-;
-;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-; R0 char *pred_ptr
-; R1 short *dif_ptr
-; R2 char *dst_ptr
-; R3 int stride
-|vp8_recon2b_armv6| PROC
- stmdb sp!, {r4 - r9, lr}
-
- mov lr, #4
-
-recon2b_loop
- ;0, 1, 2, 3
- ldr r4, [prd], #4
- ldr r6, [dif, #0]
- ldr r7, [dif, #4]
-
- pkhbt r8, r6, r7, lsl #16
- pkhtb r9, r7, r6, asr #16
-
- uxtab16 r8, r8, r4
- uxtab16 r9, r9, r4, ror #8
- usat16 r8, #8, r8
- usat16 r9, #8, r9
- orr r8, r8, r9, lsl #8
-
- str r8, [dst]
-
- ;4, 5, 6, 7
- ldr r4, [prd], #4
-;; ldr r6, [dif, #32]
-;; ldr r7, [dif, #36]
- ldr r6, [dif, #8]
- ldr r7, [dif, #12]
-
- pkhbt r8, r6, r7, lsl #16
- pkhtb r9, r7, r6, asr #16
-
- uxtab16 r8, r8, r4
- uxtab16 r9, r9, r4, ror #8
- usat16 r8, #8, r8
- usat16 r9, #8, r9
- orr r8, r8, r9, lsl #8
-
- str r8, [dst, #4]
-
- add dst, dst, stride
-;; add dif, dif, #8
- add dif, dif, #16
-
- subs lr, lr, #1
- bne recon2b_loop
-
- ldmia sp!, {r4 - r9, pc}
-
- ENDP ; |Recon2B|
-
- END