summaryrefslogtreecommitdiff
path: root/vp8/decoder
diff options
context:
space:
mode:
authorSuman Sunkara <sunkaras@google.com>2010-11-16 15:09:26 -0500
committerSuman Sunkara <sunkaras@google.com>2010-11-16 16:30:59 -0500
commit4b3f72001de952aaf3874d3ce2dca3cb3fbe3928 (patch)
tree754d87a4fa5cae908388f0a26b2affad7cd8a523 /vp8/decoder
parentb9a18344cf8e5283928525c5ac0897ede79f9e57 (diff)
parent00fe7441e9c5cbd898a8382d49cf9396d8482464 (diff)
downloadlibvpx-4b3f72001de952aaf3874d3ce2dca3cb3fbe3928.tar
libvpx-4b3f72001de952aaf3874d3ce2dca3cb3fbe3928.tar.gz
libvpx-4b3f72001de952aaf3874d3ce2dca3cb3fbe3928.tar.bz2
libvpx-4b3f72001de952aaf3874d3ce2dca3cb3fbe3928.zip
Merge branch 'experimental' of ssh://on2-git.corp.google.com:29418/libvpx into test
Conflicts: vp8/common/blockd.h vp8/decoder/decodemv.c vp8/decoder/decodframe.c vp8/decoder/demode.c vp8/decoder/onyxd_if.c vp8/decoder/onyxd_int.h vp8/encoder/encodeframe.c Change-Id: Ic379f4dffaded9796dc19d56be304d3f8527c61f
Diffstat (limited to 'vp8/decoder')
-rw-r--r--vp8/decoder/arm/arm_dsystemdependent.c66
-rw-r--r--vp8/decoder/arm/armv5/dequantize_v5.asm11
-rw-r--r--vp8/decoder/arm/armv6/dboolhuff_v6.asm11
-rw-r--r--vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm218
-rw-r--r--vp8/decoder/arm/armv6/dequant_idct_v6.asm196
-rw-r--r--vp8/decoder/arm/armv6/dequantdcidct_v6.asm202
-rw-r--r--vp8/decoder/arm/armv6/dequantidct_v6.asm183
-rw-r--r--vp8/decoder/arm/armv6/dequantize_v6.asm11
-rw-r--r--vp8/decoder/arm/armv6/idct_blk_v6.c151
-rw-r--r--vp8/decoder/arm/dboolhuff_arm.h16
-rw-r--r--vp8/decoder/arm/dequantize_arm.c15
-rw-r--r--vp8/decoder/arm/dequantize_arm.h63
-rw-r--r--vp8/decoder/arm/detokenize.asm320
-rw-r--r--vp8/decoder/arm/detokenize_arm.h22
-rw-r--r--vp8/decoder/arm/detokenizearm_sjl.c730
-rw-r--r--vp8/decoder/arm/detokenizearm_v6.asm364
-rw-r--r--vp8/decoder/arm/dsystemdependent.c44
-rw-r--r--vp8/decoder/arm/neon/dboolhuff_neon.asm11
-rw-r--r--vp8/decoder/arm/neon/dequant_idct_neon.asm (renamed from vp8/decoder/arm/neon/dequantidct_neon.asm)78
-rw-r--r--vp8/decoder/arm/neon/dequantdcidct_neon.asm133
-rw-r--r--vp8/decoder/arm/neon/dequantizeb_neon.asm11
-rw-r--r--vp8/decoder/arm/neon/idct_blk_neon.c115
-rw-r--r--vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm79
-rw-r--r--vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm69
-rw-r--r--vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm206
-rw-r--r--vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm198
-rw-r--r--vp8/decoder/dboolhuff.c120
-rw-r--r--vp8/decoder/dboolhuff.h155
-rw-r--r--vp8/decoder/decodemv.c678
-rw-r--r--vp8/decoder/decodemv.h11
-rw-r--r--vp8/decoder/decoderthreading.h22
-rw-r--r--vp8/decoder/decodframe.c489
-rw-r--r--vp8/decoder/demode.c160
-rw-r--r--vp8/decoder/demode.h32
-rw-r--r--vp8/decoder/dequantize.c74
-rw-r--r--vp8/decoder/dequantize.h88
-rw-r--r--vp8/decoder/detokenize.c220
-rw-r--r--vp8/decoder/detokenize.h21
-rw-r--r--vp8/decoder/generic/dsystemdependent.c36
-rw-r--r--vp8/decoder/idct_blk.c124
-rw-r--r--vp8/decoder/onyxd_if.c322
-rw-r--r--vp8/decoder/onyxd_if_sjl.c398
-rw-r--r--vp8/decoder/onyxd_int.h65
-rw-r--r--vp8/decoder/reconintra_mt.c982
-rw-r--r--vp8/decoder/reconintra_mt.h26
-rw-r--r--vp8/decoder/threading.c1059
-rw-r--r--vp8/decoder/treereader.h11
-rw-r--r--vp8/decoder/x86/dequantize_mmx.asm155
-rw-r--r--vp8/decoder/x86/dequantize_x86.h54
-rw-r--r--vp8/decoder/x86/idct_blk_mmx.c151
-rw-r--r--vp8/decoder/x86/idct_blk_sse2.c114
-rw-r--r--vp8/decoder/x86/onyxdxv.c11
-rw-r--r--vp8/decoder/x86/x86_dsystemdependent.c31
-rw-r--r--vp8/decoder/xprintf.c163
-rw-r--r--vp8/decoder/xprintf.h32
55 files changed, 5403 insertions, 3924 deletions
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c
new file mode 100644
index 000000000..e9741e286
--- /dev/null
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "blockd.h"
+#include "pragmas.h"
+#include "postproc.h"
+#include "dboolhuff.h"
+#include "dequantize.h"
+#include "onyxd_int.h"
+
+void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+ int flags = pbi->common.rtcd.flags;
+ int has_edsp = flags & HAS_EDSP;
+ int has_media = flags & HAS_MEDIA;
+ int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+ if (has_media)
+ {
+ pbi->dequant.block = vp8_dequantize_b_v6;
+ pbi->dequant.idct_add = vp8_dequant_idct_add_v6;
+ pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_v6;
+ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
+ pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
+ pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
+#if 0 /*For use with RTCD, when implemented*/
+ pbi->dboolhuff.start = vp8dx_start_decode_c;
+ pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
+ pbi->dboolhuff.debool = vp8dx_decode_bool_c;
+ pbi->dboolhuff.devalue = vp8dx_decode_value_c;
+#endif
+ }
+#endif
+
+#if HAVE_ARMV7
+ if (has_neon)
+ {
+ pbi->dequant.block = vp8_dequantize_b_neon;
+ pbi->dequant.idct_add = vp8_dequant_idct_add_neon;
+ /*This is not used: NEON always dequants two blocks at once.
+ pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_neon;*/
+ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
+ pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
+ pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
+#if 0 /*For use with RTCD, when implemented*/
+ pbi->dboolhuff.start = vp8dx_start_decode_c;
+ pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
+ pbi->dboolhuff.debool = vp8dx_decode_bool_c;
+ pbi->dboolhuff.devalue = vp8dx_decode_value_c;
+#endif
+ }
+#endif
+#endif
+}
diff --git a/vp8/decoder/arm/armv5/dequantize_v5.asm b/vp8/decoder/arm/armv5/dequantize_v5.asm
index eb3f0307c..de3648ae2 100644
--- a/vp8/decoder/arm/armv5/dequantize_v5.asm
+++ b/vp8/decoder/arm/armv5/dequantize_v5.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/armv6/dboolhuff_v6.asm b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
index 143e33e46..6515804bb 100644
--- a/vp8/decoder/arm/armv6/dboolhuff_v6.asm
+++ b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
new file mode 100644
index 000000000..6bebda24f
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
@@ -0,0 +1,218 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_dequant_dc_idct_add_v6|
+
+ AREA |.text|, CODE, READONLY
+
+;void vp8_dequant_dc_idct_v6(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride, int Dc)
+; r0 = input
+; r1 = dq
+; r2 = pred
+; r3 = dest
+; sp + 36 = pitch ; +4 = 40
+; sp + 40 = stride ; +4 = 44
+; sp + 44 = Dc ; +4 = 48
+
+
+|vp8_dequant_dc_idct_add_v6| PROC
+ stmdb sp!, {r4-r11, lr}
+
+ ldr r6, [sp, #44]
+
+ ldr r4, [r0] ;input
+ ldr r5, [r1], #4 ;dq
+
+ sub sp, sp, #4
+ str r3, [sp]
+
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ mov r12, #3
+
+vp8_dequant_dc_add_loop
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ subs r12, r12, #1
+
+ ldrne r4, [r0, #4]
+ ldrne r5, [r1], #4
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ bne vp8_dequant_dc_add_loop
+
+ sub r0, r0, #32
+ mov r1, r0
+
+; short_idct4x4llm_v6_dual
+ ldr r3, cospi8sqrt2minus1
+ ldr r4, sinpi8sqrt2
+ ldr r6, [r0, #8]
+ mov r5, #2
+vp8_dequant_dc_idct_loop1_v6
+ ldr r12, [r0, #24]
+ ldr r14, [r0, #16]
+ smulwt r9, r3, r6
+ smulwb r7, r3, r6
+ smulwt r10, r4, r6
+ smulwb r8, r4, r6
+ pkhbt r7, r7, r9, lsl #16
+ smulwt r11, r3, r12
+ pkhbt r8, r8, r10, lsl #16
+ uadd16 r6, r6, r7
+ smulwt r7, r4, r12
+ smulwb r9, r3, r12
+ smulwb r10, r4, r12
+ subs r5, r5, #1
+ pkhbt r9, r9, r11, lsl #16
+ ldr r11, [r0], #4
+ pkhbt r10, r10, r7, lsl #16
+ uadd16 r7, r12, r9
+ usub16 r7, r8, r7
+ uadd16 r6, r6, r10
+ uadd16 r10, r11, r14
+ usub16 r8, r11, r14
+ uadd16 r9, r10, r6
+ usub16 r10, r10, r6
+ uadd16 r6, r8, r7
+ usub16 r7, r8, r7
+ str r6, [r1, #8]
+ ldrne r6, [r0, #8]
+ str r7, [r1, #16]
+ str r10, [r1, #24]
+ str r9, [r1], #4
+ bne vp8_dequant_dc_idct_loop1_v6
+
+ mov r5, #2
+ sub r0, r1, #8
+vp8_dequant_dc_idct_loop2_v6
+ ldr r6, [r0], #4
+ ldr r7, [r0], #4
+ ldr r8, [r0], #4
+ ldr r9, [r0], #4
+ smulwt r1, r3, r6
+ smulwt r12, r4, r6
+ smulwt lr, r3, r8
+ smulwt r10, r4, r8
+ pkhbt r11, r8, r6, lsl #16
+ pkhbt r1, lr, r1, lsl #16
+ pkhbt r12, r10, r12, lsl #16
+ pkhtb r6, r6, r8, asr #16
+ uadd16 r6, r1, r6
+ pkhbt lr, r9, r7, lsl #16
+ uadd16 r10, r11, lr
+ usub16 lr, r11, lr
+ pkhtb r8, r7, r9, asr #16
+ subs r5, r5, #1
+ smulwt r1, r3, r8
+ smulwb r7, r3, r8
+ smulwt r11, r4, r8
+ smulwb r9, r4, r8
+ pkhbt r1, r7, r1, lsl #16
+ uadd16 r8, r1, r8
+ pkhbt r11, r9, r11, lsl #16
+ usub16 r1, r12, r8
+ uadd16 r8, r11, r6
+ ldr r9, c0x00040004
+ ldr r12, [sp, #40]
+ uadd16 r6, r10, r8
+ usub16 r7, r10, r8
+ uadd16 r7, r7, r9
+ uadd16 r6, r6, r9
+ uadd16 r10, r14, r1
+ usub16 r1, r14, r1
+ uadd16 r10, r10, r9
+ uadd16 r1, r1, r9
+ ldr r11, [r2], r12
+ mov r8, r7, asr #3
+ pkhtb r9, r8, r10, asr #19
+ mov r8, r1, asr #3
+ pkhtb r8, r8, r6, asr #19
+ uxtb16 lr, r11, ror #8
+ qadd16 r9, r9, lr
+ uxtb16 lr, r11
+ qadd16 r8, r8, lr
+ usat16 r9, #8, r9
+ usat16 r8, #8, r8
+ orr r9, r8, r9, lsl #8
+ ldr r11, [r2], r12
+ ldr lr, [sp]
+ ldr r12, [sp, #44]
+ mov r7, r7, lsl #16
+ mov r1, r1, lsl #16
+ mov r10, r10, lsl #16
+ mov r6, r6, lsl #16
+ mov r7, r7, asr #3
+ pkhtb r7, r7, r10, asr #19
+ mov r1, r1, asr #3
+ pkhtb r1, r1, r6, asr #19
+ uxtb16 r8, r11, ror #8
+ qadd16 r7, r7, r8
+ uxtb16 r8, r11
+ qadd16 r1, r1, r8
+ usat16 r7, #8, r7
+ usat16 r1, #8, r1
+ orr r1, r1, r7, lsl #8
+ str r9, [lr], r12
+ str r1, [lr], r12
+ str lr, [sp]
+ bne vp8_dequant_dc_idct_loop2_v6
+
+; vpx_memset
+ sub r0, r0, #32
+ add sp, sp, #4
+
+ mov r12, #0
+ str r12, [r0]
+ str r12, [r0, #4]
+ str r12, [r0, #8]
+ str r12, [r0, #12]
+ str r12, [r0, #16]
+ str r12, [r0, #20]
+ str r12, [r0, #24]
+ str r12, [r0, #28]
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ; |vp8_dequant_dc_idct_add_v6|
+
+; Constant Pool
+cospi8sqrt2minus1 DCD 0x00004E7B
+sinpi8sqrt2 DCD 0x00008A8C
+c0x00040004 DCD 0x00040004
+
+ END
diff --git a/vp8/decoder/arm/armv6/dequant_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
new file mode 100644
index 000000000..47b671ca6
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
@@ -0,0 +1,196 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+ EXPORT |vp8_dequant_idct_add_v6|
+
+ AREA |.text|, CODE, READONLY
+;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride)
+; r0 = input
+; r1 = dq
+; r2 = pred
+; r3 = dest
+; sp + 36 = pitch ; +4 = 40
+; sp + 40 = stride ; +4 = 44
+
+
+|vp8_dequant_idct_add_v6| PROC
+ stmdb sp!, {r4-r11, lr}
+
+ ldr r4, [r0] ;input
+ ldr r5, [r1], #4 ;dq
+
+ sub sp, sp, #4
+ str r3, [sp]
+
+ mov r12, #4
+
+vp8_dequant_add_loop
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ subs r12, r12, #1
+
+ ldrne r4, [r0, #4]
+ ldrne r5, [r1], #4
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ bne vp8_dequant_add_loop
+
+ sub r0, r0, #32
+ mov r1, r0
+
+; short_idct4x4llm_v6_dual
+ ldr r3, cospi8sqrt2minus1
+ ldr r4, sinpi8sqrt2
+ ldr r6, [r0, #8]
+ mov r5, #2
+vp8_dequant_idct_loop1_v6
+ ldr r12, [r0, #24]
+ ldr r14, [r0, #16]
+ smulwt r9, r3, r6
+ smulwb r7, r3, r6
+ smulwt r10, r4, r6
+ smulwb r8, r4, r6
+ pkhbt r7, r7, r9, lsl #16
+ smulwt r11, r3, r12
+ pkhbt r8, r8, r10, lsl #16
+ uadd16 r6, r6, r7
+ smulwt r7, r4, r12
+ smulwb r9, r3, r12
+ smulwb r10, r4, r12
+ subs r5, r5, #1
+ pkhbt r9, r9, r11, lsl #16
+ ldr r11, [r0], #4
+ pkhbt r10, r10, r7, lsl #16
+ uadd16 r7, r12, r9
+ usub16 r7, r8, r7
+ uadd16 r6, r6, r10
+ uadd16 r10, r11, r14
+ usub16 r8, r11, r14
+ uadd16 r9, r10, r6
+ usub16 r10, r10, r6
+ uadd16 r6, r8, r7
+ usub16 r7, r8, r7
+ str r6, [r1, #8]
+ ldrne r6, [r0, #8]
+ str r7, [r1, #16]
+ str r10, [r1, #24]
+ str r9, [r1], #4
+ bne vp8_dequant_idct_loop1_v6
+
+ mov r5, #2
+ sub r0, r1, #8
+vp8_dequant_idct_loop2_v6
+ ldr r6, [r0], #4
+ ldr r7, [r0], #4
+ ldr r8, [r0], #4
+ ldr r9, [r0], #4
+ smulwt r1, r3, r6
+ smulwt r12, r4, r6
+ smulwt lr, r3, r8
+ smulwt r10, r4, r8
+ pkhbt r11, r8, r6, lsl #16
+ pkhbt r1, lr, r1, lsl #16
+ pkhbt r12, r10, r12, lsl #16
+ pkhtb r6, r6, r8, asr #16
+ uadd16 r6, r1, r6
+ pkhbt lr, r9, r7, lsl #16
+ uadd16 r10, r11, lr
+ usub16 lr, r11, lr
+ pkhtb r8, r7, r9, asr #16
+ subs r5, r5, #1
+ smulwt r1, r3, r8
+ smulwb r7, r3, r8
+ smulwt r11, r4, r8
+ smulwb r9, r4, r8
+ pkhbt r1, r7, r1, lsl #16
+ uadd16 r8, r1, r8
+ pkhbt r11, r9, r11, lsl #16
+ usub16 r1, r12, r8
+ uadd16 r8, r11, r6
+ ldr r9, c0x00040004
+ ldr r12, [sp, #40]
+ uadd16 r6, r10, r8
+ usub16 r7, r10, r8
+ uadd16 r7, r7, r9
+ uadd16 r6, r6, r9
+ uadd16 r10, r14, r1
+ usub16 r1, r14, r1
+ uadd16 r10, r10, r9
+ uadd16 r1, r1, r9
+ ldr r11, [r2], r12
+ mov r8, r7, asr #3
+ pkhtb r9, r8, r10, asr #19
+ mov r8, r1, asr #3
+ pkhtb r8, r8, r6, asr #19
+ uxtb16 lr, r11, ror #8
+ qadd16 r9, r9, lr
+ uxtb16 lr, r11
+ qadd16 r8, r8, lr
+ usat16 r9, #8, r9
+ usat16 r8, #8, r8
+ orr r9, r8, r9, lsl #8
+ ldr r11, [r2], r12
+ ldr lr, [sp]
+ ldr r12, [sp, #44]
+ mov r7, r7, lsl #16
+ mov r1, r1, lsl #16
+ mov r10, r10, lsl #16
+ mov r6, r6, lsl #16
+ mov r7, r7, asr #3
+ pkhtb r7, r7, r10, asr #19
+ mov r1, r1, asr #3
+ pkhtb r1, r1, r6, asr #19
+ uxtb16 r8, r11, ror #8
+ qadd16 r7, r7, r8
+ uxtb16 r8, r11
+ qadd16 r1, r1, r8
+ usat16 r7, #8, r7
+ usat16 r1, #8, r1
+ orr r1, r1, r7, lsl #8
+ str r9, [lr], r12
+ str r1, [lr], r12
+ str lr, [sp]
+ bne vp8_dequant_idct_loop2_v6
+
+; vpx_memset
+ sub r0, r0, #32
+ add sp, sp, #4
+
+ mov r12, #0
+ str r12, [r0]
+ str r12, [r0, #4]
+ str r12, [r0, #8]
+ str r12, [r0, #12]
+ str r12, [r0, #16]
+ str r12, [r0, #20]
+ str r12, [r0, #24]
+ str r12, [r0, #28]
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ; |vp8_dequant_idct_add_v6|
+
+; Constant Pool
+cospi8sqrt2minus1 DCD 0x00004E7B
+sinpi8sqrt2 DCD 0x00008A8C
+c0x00040004 DCD 0x00040004
+
+ END
diff --git a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm b/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
deleted file mode 100644
index 3daa9b34f..000000000
--- a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
+++ /dev/null
@@ -1,202 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequant_dc_idct_v6|
- ; ARM
- ; REQUIRE8
- ; PRESERVE8
-
- AREA |.text|, CODE, READONLY ; name this block of code
-;void vp8_dequant_dc_idct_v6(short *input, short *dq, short *output, int pitch,int Dc)
-|vp8_dequant_dc_idct_v6| PROC
- stmdb sp!, {r4-r11, lr}
-
- ldr r6, [sp, #36] ;load Dc
-
- ldr r4, [r0] ;input
- ldr r5, [r1], #4 ;dq
-
- sub sp, sp, #4
- str r0, [sp]
-
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- mov r12, #3
-
-dequant_dc_idct_loop
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- subs r12, r12, #1
-
- ldrne r4, [r0, #4]
- ldrne r5, [r1], #4
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- bne dequant_dc_idct_loop
-
- sub r0, r0, #32
- mov r1, r2
- mov r2, r3
-
-; short_idct4x4llm_v6_dual
-
- mov r3, #0x00004E00 ; cos
- orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
- mov r4, #0x00008A00 ; sin
- orr r4, r4, #0x0000008C ; sinpi8sqrt2
- mov r5, #0x2 ; i=2 i
-loop1_dual_11
- ldr r6, [r0, #(4*2)] ; i5 | i4 5|4
- ldr r12, [r0, #(12*2)] ; i13 | i12 13|12
- ldr r14, [r0, #(8*2)] ; i9 | i8 9|8
-
- smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
- smulwb r7, r3, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 4c
- smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
- smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 4s
- pkhbt r7, r7, r9, lsl #16 ; 5c | 4c
- smulwt r11, r3, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 13c
- pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
- uadd16 r6, r6, r7 ; 5c+5 | 4c+4
- smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 13s
- smulwb r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 12c
- smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 12s
- subs r5, r5, #0x1 ; i-- --
- pkhbt r9, r9, r11, lsl #16 ; 13c | 12c
- ldr r11, [r0], #0x4 ; i1 | i0 ++ 1|0
- pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
- uadd16 r7, r12, r9 ; 13c+13 | 12c+12
- usub16 r7, r8, r7 ; c c
- uadd16 r6, r6, r10 ; d d
- uadd16 r10, r11, r14 ; a a
- usub16 r8, r11, r14 ; b b
- uadd16 r9, r10, r6 ; a+d a+d
- usub16 r10, r10, r6 ; a-d a-d
- uadd16 r6, r8, r7 ; b+c b+c
- usub16 r7, r8, r7 ; b-c b-c
- str r6, [r1, r2] ; o5 | o4
- add r6, r2, r2 ; pitch * 2 p2
- str r7, [r1, r6] ; o9 | o8
- add r6, r6, r2 ; pitch * 3 p3
- str r10, [r1, r6] ; o13 | o12
- str r9, [r1], #0x4 ; o1 | o0 ++
- bne loop1_dual_11 ;
- mov r5, #0x2 ; i=2 i
- sub r0, r1, #8 ; reset input/output i/o
-loop2_dual_22
- ldr r6, [r0, r2] ; i5 | i4 5|4
- ldr r1, [r0] ; i1 | i0 1|0
- ldr r12, [r0, #0x4] ; i3 | i2 3|2
- add r14, r2, #0x4 ; pitch + 2 p+2
- ldr r14, [r0, r14] ; i7 | i6 7|6
- smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
- smulwt r7, r3, r1 ; (ip[1] * cospi8sqrt2minus1) >> 16 1c
- smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
- smulwt r8, r4, r1 ; (ip[1] * sinpi8sqrt2) >> 16 1s
- pkhbt r11, r6, r1, lsl #16 ; i0 | i4 0|4
- pkhbt r7, r9, r7, lsl #16 ; 1c | 5c
- pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 © tc1
- pkhtb r1, r1, r6, asr #16 ; i1 | i5 1|5
- uadd16 r1, r7, r1 ; 1c+1 | 5c+5 = temp2 (d) td2
- pkhbt r9, r14, r12, lsl #16 ; i2 | i6 2|6
- uadd16 r10, r11, r9 ; a a
- usub16 r9, r11, r9 ; b b
- pkhtb r6, r12, r14, asr #16 ; i3 | i7 3|7
- subs r5, r5, #0x1 ; i-- --
- smulwt r7, r3, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 3c
- smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 3s
- smulwb r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 7c
- smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 7s
-
- pkhbt r7, r12, r7, lsl #16 ; 3c | 7c
- pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 (d) td1
- uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 (c) tc2
- usub16 r12, r8, r6 ; c (o1 | o5) c
- uadd16 r6, r11, r1 ; d (o3 | o7) d
- uadd16 r7, r10, r6 ; a+d a+d
- mov r8, #0x4 ; set up 4's 4
- orr r8, r8, #0x40000 ; 4|4
- usub16 r6, r10, r6 ; a-d a-d
- uadd16 r6, r6, r8 ; a-d+4 3|7
- uadd16 r7, r7, r8 ; a+d+4 0|4
- uadd16 r10, r9, r12 ; b+c b+c
- usub16 r1, r9, r12 ; b-c b-c
- uadd16 r10, r10, r8 ; b+c+4 1|5
- uadd16 r1, r1, r8 ; b-c+4 2|6
- mov r8, r10, asr #19 ; o1 >> 3
- strh r8, [r0, #2] ; o1
- mov r8, r1, asr #19 ; o2 >> 3
- strh r8, [r0, #4] ; o2
- mov r8, r6, asr #19 ; o3 >> 3
- strh r8, [r0, #6] ; o3
- mov r8, r7, asr #19 ; o0 >> 3
- strh r8, [r0], r2 ; o0 +p
- sxth r10, r10 ;
- mov r8, r10, asr #3 ; o5 >> 3
- strh r8, [r0, #2] ; o5
- sxth r1, r1 ;
- mov r8, r1, asr #3 ; o6 >> 3
- strh r8, [r0, #4] ; o6
- sxth r6, r6 ;
- mov r8, r6, asr #3 ; o7 >> 3
- strh r8, [r0, #6] ; o7
- sxth r7, r7 ;
- mov r8, r7, asr #3 ; o4 >> 3
- strh r8, [r0], r2 ; o4 +p
-;;;;; subs r5, r5, #0x1 ; i-- --
- bne loop2_dual_22 ;
-
-
-;vpx_memset
- ldr r0, [sp]
- add sp, sp, #4
-
- mov r12, #0
- str r12, [r0]
- str r12, [r0, #4]
- str r12, [r0, #8]
- str r12, [r0, #12]
- str r12, [r0, #16]
- str r12, [r0, #20]
- str r12, [r0, #24]
- str r12, [r0, #28]
-
- ldmia sp!, {r4 - r11, pc} ; replace vars, return restore
-
- ENDP ;|vp8_dequant_dc_idct_v68|
-
- END
diff --git a/vp8/decoder/arm/armv6/dequantidct_v6.asm b/vp8/decoder/arm/armv6/dequantidct_v6.asm
deleted file mode 100644
index 61bb48d04..000000000
--- a/vp8/decoder/arm/armv6/dequantidct_v6.asm
+++ /dev/null
@@ -1,183 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequant_idct_v6|
- ; ARM
- ; REQUIRE8
- ; PRESERVE8
-
- AREA |.text|, CODE, READONLY ; name this block of code
-;void vp8_dequant_idct_v6(short *input, short *dq, short *output, int pitch)
-|vp8_dequant_idct_v6| PROC
- stmdb sp!, {r4-r11, lr}
-
- ldr r4, [r0] ;input
- ldr r5, [r1], #4 ;dq
-
- sub sp, sp, #4
- str r0, [sp]
-
- mov r12, #4
-
-dequant_idct_loop
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- subs r12, r12, #1
-
- ldrne r4, [r0, #4]
- ldrne r5, [r1], #4
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- bne dequant_idct_loop
-
- sub r0, r0, #32
- mov r1, r2
- mov r2, r3
-
-; short_idct4x4llm_v6_dual
-
- mov r3, #0x00004E00 ; cos
- orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
- mov r4, #0x00008A00 ; sin
- orr r4, r4, #0x0000008C ; sinpi8sqrt2
- mov r5, #0x2 ; i=2 i
-loop1_dual_1
- ldr r6, [r0, #(4*2)] ; i5 | i4 5|4
- ldr r12, [r0, #(12*2)] ; i13 | i12 13|12
- ldr r14, [r0, #(8*2)] ; i9 | i8 9|8
-
- smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
- smulwb r7, r3, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 4c
- smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
- smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 4s
- pkhbt r7, r7, r9, lsl #16 ; 5c | 4c
- smulwt r11, r3, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 13c
- pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
- uadd16 r6, r6, r7 ; 5c+5 | 4c+4
- smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 13s
- smulwb r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 12c
- smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 12s
- subs r5, r5, #0x1 ; i-- --
- pkhbt r9, r9, r11, lsl #16 ; 13c | 12c
- ldr r11, [r0], #0x4 ; i1 | i0 ++ 1|0
- pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
- uadd16 r7, r12, r9 ; 13c+13 | 12c+12
- usub16 r7, r8, r7 ; c c
- uadd16 r6, r6, r10 ; d d
- uadd16 r10, r11, r14 ; a a
- usub16 r8, r11, r14 ; b b
- uadd16 r9, r10, r6 ; a+d a+d
- usub16 r10, r10, r6 ; a-d a-d
- uadd16 r6, r8, r7 ; b+c b+c
- usub16 r7, r8, r7 ; b-c b-c
- str r6, [r1, r2] ; o5 | o4
- add r6, r2, r2 ; pitch * 2 p2
- str r7, [r1, r6] ; o9 | o8
- add r6, r6, r2 ; pitch * 3 p3
- str r10, [r1, r6] ; o13 | o12
- str r9, [r1], #0x4 ; o1 | o0 ++
- bne loop1_dual_1 ;
- mov r5, #0x2 ; i=2 i
- sub r0, r1, #8 ; reset input/output i/o
-loop2_dual_2
- ldr r6, [r0, r2] ; i5 | i4 5|4
- ldr r1, [r0] ; i1 | i0 1|0
- ldr r12, [r0, #0x4] ; i3 | i2 3|2
- add r14, r2, #0x4 ; pitch + 2 p+2
- ldr r14, [r0, r14] ; i7 | i6 7|6
- smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
- smulwt r7, r3, r1 ; (ip[1] * cospi8sqrt2minus1) >> 16 1c
- smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
- smulwt r8, r4, r1 ; (ip[1] * sinpi8sqrt2) >> 16 1s
- pkhbt r11, r6, r1, lsl #16 ; i0 | i4 0|4
- pkhbt r7, r9, r7, lsl #16 ; 1c | 5c
- pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 © tc1
- pkhtb r1, r1, r6, asr #16 ; i1 | i5 1|5
- uadd16 r1, r7, r1 ; 1c+1 | 5c+5 = temp2 (d) td2
- pkhbt r9, r14, r12, lsl #16 ; i2 | i6 2|6
- uadd16 r10, r11, r9 ; a a
- usub16 r9, r11, r9 ; b b
- pkhtb r6, r12, r14, asr #16 ; i3 | i7 3|7
- subs r5, r5, #0x1 ; i-- --
- smulwt r7, r3, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 3c
- smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 3s
- smulwb r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 7c
- smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 7s
-
- pkhbt r7, r12, r7, lsl #16 ; 3c | 7c
- pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 (d) td1
- uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 (c) tc2
- usub16 r12, r8, r6 ; c (o1 | o5) c
- uadd16 r6, r11, r1 ; d (o3 | o7) d
- uadd16 r7, r10, r6 ; a+d a+d
- mov r8, #0x4 ; set up 4's 4
- orr r8, r8, #0x40000 ; 4|4
- usub16 r6, r10, r6 ; a-d a-d
- uadd16 r6, r6, r8 ; a-d+4 3|7
- uadd16 r7, r7, r8 ; a+d+4 0|4
- uadd16 r10, r9, r12 ; b+c b+c
- usub16 r1, r9, r12 ; b-c b-c
- uadd16 r10, r10, r8 ; b+c+4 1|5
- uadd16 r1, r1, r8 ; b-c+4 2|6
- mov r8, r10, asr #19 ; o1 >> 3
- strh r8, [r0, #2] ; o1
- mov r8, r1, asr #19 ; o2 >> 3
- strh r8, [r0, #4] ; o2
- mov r8, r6, asr #19 ; o3 >> 3
- strh r8, [r0, #6] ; o3
- mov r8, r7, asr #19 ; o0 >> 3
- strh r8, [r0], r2 ; o0 +p
- sxth r10, r10 ;
- mov r8, r10, asr #3 ; o5 >> 3
- strh r8, [r0, #2] ; o5
- sxth r1, r1 ;
- mov r8, r1, asr #3 ; o6 >> 3
- strh r8, [r0, #4] ; o6
- sxth r6, r6 ;
- mov r8, r6, asr #3 ; o7 >> 3
- strh r8, [r0, #6] ; o7
- sxth r7, r7 ;
- mov r8, r7, asr #3 ; o4 >> 3
- strh r8, [r0], r2 ; o4 +p
-;;;;; subs r5, r5, #0x1 ; i-- --
- bne loop2_dual_2 ;
- ;
-
-;vpx_memset
- ldr r0, [sp]
- add sp, sp, #4
-
- mov r12, #0
- str r12, [r0]
- str r12, [r0, #4]
- str r12, [r0, #8]
- str r12, [r0, #12]
- str r12, [r0, #16]
- str r12, [r0, #20]
- str r12, [r0, #24]
- str r12, [r0, #28]
-
- ldmia sp!, {r4 - r11, pc} ; replace vars, return restore
-
- ENDP ;|vp8_dequant_idct_v6|
-
- END
diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/decoder/arm/armv6/dequantize_v6.asm
index 95e38594f..72f7e0ee5 100644
--- a/vp8/decoder/arm/armv6/dequantize_v6.asm
+++ b/vp8/decoder/arm/armv6/dequantize_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/decoder/arm/armv6/idct_blk_v6.c
new file mode 100644
index 000000000..3c7bc502f
--- /dev/null
+++ b/vp8/decoder/arm/armv6/idct_blk_v6.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_v6
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs, short *dc)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_dc_idct_add_v6 (q, dq, pre, dst, 16, stride, dc[0]);
+ else
+ vp8_dc_only_idct_add_v6 (dc[0], pre, dst, 16, stride);
+
+ if (eobs[1] > 1)
+ vp8_dequant_dc_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+ else
+ vp8_dc_only_idct_add_v6 (dc[1], pre+4, dst+4, 16, stride);
+
+ if (eobs[2] > 1)
+ vp8_dequant_dc_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
+ else
+ vp8_dc_only_idct_add_v6 (dc[2], pre+8, dst+8, 16, stride);
+
+ if (eobs[3] > 1)
+ vp8_dequant_dc_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
+ else
+ vp8_dc_only_idct_add_v6 (dc[3], pre+12, dst+12, 16, stride);
+
+ q += 64;
+ dc += 4;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_y_block_v6
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_v6 (q, dq, pre, dst, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dst, 16, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dst+4, 16, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ if (eobs[2] > 1)
+ vp8_dequant_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[32]*dq[0], pre+8, dst+8, 16, stride);
+ ((int *)(q+32))[0] = 0;
+ }
+
+ if (eobs[3] > 1)
+ vp8_dequant_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[48]*dq[0], pre+12, dst+12, 16, stride);
+ ((int *)(q+48))[0] = 0;
+ }
+
+ q += 64;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_v6
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_v6 (q, dq, pre, dstu, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstu, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstu+4, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstu += 4*stride;
+ eobs += 2;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_v6 (q, dq, pre, dstv, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstv, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstv+4, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstv += 4*stride;
+ eobs += 2;
+ }
+}
diff --git a/vp8/decoder/arm/dboolhuff_arm.h b/vp8/decoder/arm/dboolhuff_arm.h
index 495004f9c..985951c7c 100644
--- a/vp8/decoder/arm/dboolhuff_arm.h
+++ b/vp8/decoder/arm/dboolhuff_arm.h
@@ -11,14 +11,11 @@
* to be useless. However, its been left (for now)
* for reference.
*/
-/*
+#if 0
#if HAVE_ARMV6
#undef vp8_dbool_start
#define vp8_dbool_start vp8dx_start_decode_v6
-#undef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_v6
-
#undef vp8_dbool_fill
#define vp8_dbool_fill vp8_bool_decoder_fill_v6
@@ -27,15 +24,12 @@
#undef vp8_dbool_devalue
#define vp8_dbool_devalue vp8_decode_value_v6
-#endif // HAVE_ARMV6
+#endif /* HAVE_ARMV6 */
#if HAVE_ARMV7
#undef vp8_dbool_start
#define vp8_dbool_start vp8dx_start_decode_neon
-#undef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_neon
-
#undef vp8_dbool_fill
#define vp8_dbool_fill vp8_bool_decoder_fill_neon
@@ -44,6 +38,6 @@
#undef vp8_dbool_devalue
#define vp8_dbool_devalue vp8_decode_value_neon
-#endif // HAVE_ARMV7
-*/
-#endif // DBOOLHUFF_ARM_H
+#endif /* HAVE_ARMV7 */
+#endif
+#endif /* DBOOLHUFF_ARM_H */
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c
index 54006a921..b3e14b793 100644
--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/decoder/arm/dequantize_arm.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -29,7 +30,7 @@ void vp8_dequantize_b_neon(BLOCKD *d)
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
- short *DQC = &d->dequant[0][0];
+ short *DQC = d->dequant;
vp8_dequantize_b_loop_neon(Q, DQC, DQ);
}
@@ -41,7 +42,7 @@ void vp8_dequantize_b_v6(BLOCKD *d)
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
- short *DQC = &d->dequant[0][0];
+ short *DQC = d->dequant;
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
}
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index c8a61a4a7..b7d800d26 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,32 +14,60 @@
#if HAVE_ARMV6
extern prototype_dequant_block(vp8_dequantize_b_v6);
-extern prototype_dequant_idct(vp8_dequant_idct_v6);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_v6);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_v6);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_v6);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_v6
-#undef vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_v6
+#undef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_v6
-#undef vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_v6
+#undef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_v6
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_v6
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_v6
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
+#endif
#endif
#if HAVE_ARMV7
extern prototype_dequant_block(vp8_dequantize_b_neon);
-extern prototype_dequant_idct(vp8_dequant_idct_neon);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_neon);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_neon);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_neon);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neon);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_neon
-#undef vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_neon
+#undef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_neon
-#undef vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_neon
+#undef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_neon
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_neon
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_neon
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
+#endif
#endif
#endif
diff --git a/vp8/decoder/arm/detokenize.asm b/vp8/decoder/arm/detokenize.asm
new file mode 100644
index 000000000..45e068a9f
--- /dev/null
+++ b/vp8/decoder/arm/detokenize.asm
@@ -0,0 +1,320 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_decode_mb_tokens_v6|
+
+ AREA |.text|, CODE, READONLY ; name this block of code
+
+ INCLUDE vpx_asm_offsets.asm
+
+l_qcoeff EQU 0
+l_i EQU 4
+l_type EQU 8
+l_stop EQU 12
+l_c EQU 16
+l_l_ptr EQU 20
+l_a_ptr EQU 24
+l_bc EQU 28
+l_coef_ptr EQU 32
+l_stacksize EQU 64
+
+
+;; constant offsets -- these should be created at build time
+c_block2above_offset EQU 25
+c_entropy_nodes EQU 11
+c_dct_eob_token EQU 11
+
+|vp8_decode_mb_tokens_v6| PROC
+ stmdb sp!, {r4 - r11, lr}
+ sub sp, sp, #l_stacksize
+ mov r7, r1 ; type
+ mov r9, r0 ; detoken
+
+ ldr r1, [r9, #detok_current_bc]
+ ldr r0, [r9, #detok_qcoeff_start_ptr]
+ mov r11, #0 ; i
+ mov r3, #16 ; stop
+
+ cmp r7, #1 ; type ?= 1
+ addeq r11, r11, #24 ; i = 24
+ addeq r3, r3, #8 ; stop = 24
+ addeq r0, r0, #3, 24 ; qcoefptr += 24*16
+
+ str r0, [sp, #l_qcoeff]
+ str r11, [sp, #l_i]
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+ str r1, [sp, #l_bc]
+
+ add lr, r9, r7, lsl #2 ; detoken + type*4
+
+ ldr r8, [r1, #bool_decoder_user_buffer]
+
+ ldr r10, [lr, #detok_coef_probs]
+ ldr r5, [r1, #bool_decoder_count]
+ ldr r6, [r1, #bool_decoder_range]
+ ldr r4, [r1, #bool_decoder_value]
+
+ str r10, [sp, #l_coef_ptr]
+
+BLOCK_LOOP
+ ldr r3, [r9, #detok_ptr_block2leftabove]
+ ldr r1, [r9, #detok_L]
+ ldr r2, [r9, #detok_A]
+ ldrb r12, [r3, r11]! ; block2left[i]
+ ldrb r3, [r3, #c_block2above_offset]; block2above[i]
+
+ cmp r7, #0 ; c = !type
+ moveq r7, #1
+ movne r7, #0
+
+ ldrb r0, [r1, r12]! ; *(L += block2left[i])
+ ldrb r3, [r2, r3]! ; *(A += block2above[i])
+ mov lr, #c_entropy_nodes ; ENTROPY_NODES = 11
+
+; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
+ cmp r0, #0 ; *l ?= 0
+ movne r0, #1
+ cmp r3, #0 ; *a ?= 0
+ addne r0, r0, #1 ; t
+
+ str r1, [sp, #l_l_ptr] ; save &l
+ str r2, [sp, #l_a_ptr] ; save &a
+ smlabb r0, r0, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES)
+ mov r1, #0 ; t = 0
+ str r7, [sp, #l_c]
+
+ ;align 4
+COEFF_LOOP
+ ldr r3, [r9, #detok_ptr_coef_bands_x]
+ ldr lr, [r9, #detok_coef_tree_ptr]
+ ;STALL
+ ldrb r3, [r3, r7] ; coef_bands_x[c]
+ ;STALL
+ ;STALL
+ add r0, r0, r3 ; Prob += coef_bands_x[c]
+
+get_token_loop
+ ldrb r2, [r0, +r1, asr #1] ; Prob[t >> 1]
+ mov r3, r6, lsl #8 ; range << 8
+ sub r3, r3, #256 ; (range << 8) - (1 << 8)
+ mov r10, #1 ; 1
+
+ smlawb r2, r3, r2, r10 ; split = 1 + (((range-1) * probability) >> 8)
+
+ ldrb r12, [r8] ; load cx data byte in stall slot : r8 = bufptr
+ ;++
+
+ subs r3, r4, r2, lsl #24 ; value-(split<<24): used later to calculate shift for NORMALIZE
+ addhs r1, r1, #1 ; t += 1
+ movhs r4, r3 ; value -= bigsplit (split << 24)
+ subhs r2, r6, r2 ; range -= split
+ ; movlo r6, r2 ; range = split
+
+ ldrsb r1, [lr, r1] ; t = onyx_coef_tree_ptr[t]
+
+; NORMALIZE
+ clz r3, r2 ; vp8dx_bitreader_norm[range] + 24
+ sub r3, r3, #24 ; vp8dx_bitreader_norm[range]
+ subs r5, r5, r3 ; count -= shift
+ mov r6, r2, lsl r3 ; range <<= shift
+ mov r4, r4, lsl r3 ; value <<= shift
+
+; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
+ addle r5, r5, #8 ; count += 8
+ rsble r3, r5, #24 ; 24 - count
+ addle r8, r8, #1 ; bufptr++
+ orrle r4, r4, r12, lsl r3 ; value |= *bufptr << shift + 16
+
+ cmp r1, #0 ; t ?= 0
+ bgt get_token_loop ; while (t > 0)
+
+ cmn r1, #c_dct_eob_token ; if(t == -DCT_EOB_TOKEN)
+ beq END_OF_BLOCK ; break
+
+ rsb lr, r1, #0 ; v = -t;
+
+ cmp lr, #4 ; if(v > FOUR_TOKEN)
+ ble SKIP_EXTRABITS
+
+ ldr r3, [r9, #detok_teb_base_ptr]
+ mov r11, #1 ; 1 in split = 1 + ... nope, v+= 1 << bits_count
+ add r7, r3, lr, lsl #4 ; detok_teb_base_ptr + (v << 4)
+
+ ldrsh lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
+ ldrsh r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length
+
+extrabits_loop
+ add r3, r0, r7 ; &teb_ptr->Probs[bits_count]
+
+ ldrb r2, [r3, #4] ; probability. why +4?
+ mov r3, r6, lsl #8 ; range << 8
+ sub r3, r3, #256 ; range << 8 + 1 << 8
+
+ smlawb r2, r3, r2, r11 ; split = 1 + (((range-1) * probability) >> 8)
+
+ ldrb r12, [r8] ; *bufptr
+ ;++
+
+ subs r10, r4, r2, lsl #24 ; value - (split<<24)
+ movhs r4, r10 ; value = value - (split << 24)
+ subhs r2, r6, r2 ; range = range - split
+ addhs lr, lr, r11, lsl r0 ; v += ((UINT16)1<<bits_count)
+
+; NORMALIZE
+ clz r3, r2 ; shift - leading zeros in split
+ sub r3, r3, #24 ; don't count first 3 bytes
+ subs r5, r5, r3 ; count -= shift
+ mov r6, r2, lsl r3 ; range = range << shift
+ mov r4, r4, lsl r3 ; value <<= shift
+
+ addle r5, r5, #8 ; count += BR_COUNT
+ addle r8, r8, #1 ; bufptr++
+ rsble r3, r5, #24 ; BR_COUNT - count
+ orrle r4, r4, r12, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
+
+ subs r0, r0, #1 ; bits_count --
+ bpl extrabits_loop
+
+
+SKIP_EXTRABITS
+ ldr r11, [sp, #l_qcoeff]
+ ldr r0, [sp, #l_coef_ptr] ; Prob = coef_probs
+
+ cmp r1, #0 ; check for nonzero token - if (t)
+ beq SKIP_EOB_CHECK ; if t is zero, we will skip the eob table chec
+
+ add r3, r6, #1 ; range + 1
+ mov r2, r3, lsr #1 ; split = (range + 1) >> 1
+
+ subs r3, r4, r2, lsl #24 ; value - (split<<24)
+ movhs r4, r3 ; value -= (split << 24)
+ subhs r2, r6, r2 ; range -= split
+ mvnhs r3, lr ; -v
+ addhs lr, r3, #1 ; v = (v ^ -1) + 1
+
+; NORMALIZE
+ clz r3, r2 ; leading 0s in split
+ sub r3, r3, #24 ; shift
+ subs r5, r5, r3 ; count -= shift
+ mov r6, r2, lsl r3 ; range <<= shift
+ mov r4, r4, lsl r3 ; value <<= shift
+ ldrleb r2, [r8], #1 ; *(bufptr++)
+ addle r5, r5, #8 ; count += 8
+ rsble r3, r5, #24 ; BR_COUNT - count
+ orrle r4, r4, r2, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
+
+ add r0, r0, #11 ; Prob += ENTROPY_NODES (11)
+
+ cmn r1, #1 ; t < -ONE_TOKEN
+
+ addlt r0, r0, #11 ; Prob += ENTROPY_NODES (11)
+
+ mvn r1, #1 ; t = -1 ???? C is -2
+
+SKIP_EOB_CHECK
+ ldr r7, [sp, #l_c] ; c
+ ldr r3, [r9, #detok_scan]
+ add r1, r1, #2 ; t+= 2
+ cmp r7, #15 ; c should will be one higher
+
+ ldr r3, [r3, +r7, lsl #2] ; scan[c] this needs pre-inc c value
+ add r7, r7, #1 ; c++
+ add r3, r11, r3, lsl #1 ; qcoeff + scan[c]
+
+ str r7, [sp, #l_c] ; store c
+ strh lr, [r3] ; qcoef_ptr[scan[c]] = v
+
+ blt COEFF_LOOP
+
+ sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c
+
+END_OF_BLOCK
+ ldr r3, [sp, #l_type] ; type
+ ldr r10, [sp, #l_coef_ptr] ; coef_ptr
+ ldr r0, [sp, #l_qcoeff] ; qcoeff
+ ldr r11, [sp, #l_i] ; i
+ ldr r12, [sp, #l_stop] ; stop
+
+ cmp r3, #0 ; type ?= 0
+ moveq r1, #1
+ movne r1, #0
+ add r3, r11, r9 ; detok + i
+
+ cmp r7, r1 ; c ?= !type
+ strb r7, [r3, #detok_eob] ; eob[i] = c
+
+ ldr r7, [sp, #l_l_ptr] ; l
+ ldr r2, [sp, #l_a_ptr] ; a
+ movne r3, #1 ; t
+ moveq r3, #0
+
+ add r0, r0, #32 ; qcoeff += 32 (16 * 2?)
+ add r11, r11, #1 ; i++
+ strb r3, [r7] ; *l = t
+ strb r3, [r2] ; *a = t
+ str r0, [sp, #l_qcoeff] ; qcoeff
+ str r11, [sp, #l_i] ; i
+
+ cmp r11, r12 ; i < stop
+ ldr r7, [sp, #l_type] ; type
+
+ blt BLOCK_LOOP
+
+ cmp r11, #25 ; i ?= 25
+ bne ln2_decode_mb_to
+
+ ldr r12, [r9, #detok_qcoeff_start_ptr]
+ ldr r10, [r9, #detok_coef_probs]
+ mov r7, #0 ; type/i = 0
+ mov r3, #16 ; stop = 16
+ str r12, [sp, #l_qcoeff] ; qcoeff_ptr = qcoeff_start_ptr
+ str r7, [sp, #l_i]
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+
+ str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type=0]
+
+ b BLOCK_LOOP
+
+ln2_decode_mb_to
+ cmp r11, #16 ; i ?= 16
+ bne ln1_decode_mb_to
+
+ mov r10, #detok_coef_probs
+ add r10, r10, #2*4 ; coef_probs[type]
+ ldr r10, [r9, r10] ; detok + detok_coef_probs[type]
+
+ mov r7, #2 ; type = 2
+ mov r3, #24 ; stop = 24
+
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+
+ str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type]
+ b BLOCK_LOOP
+
+ln1_decode_mb_to
+ ldr r2, [sp, #l_bc]
+ mov r0, #0
+ nop
+
+ str r8, [r2, #bool_decoder_user_buffer]
+ str r5, [r2, #bool_decoder_count]
+ str r4, [r2, #bool_decoder_value]
+ str r6, [r2, #bool_decoder_range]
+
+ add sp, sp, #l_stacksize
+ ldmia sp!, {r4 - r11, pc}
+
+ ENDP ; |vp8_decode_mb_tokens_v6|
+
+ END
diff --git a/vp8/decoder/arm/detokenize_arm.h b/vp8/decoder/arm/detokenize_arm.h
new file mode 100644
index 000000000..9bb19b6cf
--- /dev/null
+++ b/vp8/decoder/arm/detokenize_arm.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef DETOKENIZE_ARM_H
+#define DETOKENIZE_ARM_H
+
+#if HAVE_ARMV6
+#if CONFIG_ARM_ASM_DETOK
+void vp8_init_detokenizer(VP8D_COMP *dx);
+void vp8_decode_mb_tokens_v6(DETOK *detoken, int type);
+#endif
+#endif
+
+#endif
diff --git a/vp8/decoder/arm/detokenizearm_sjl.c b/vp8/decoder/arm/detokenizearm_sjl.c
deleted file mode 100644
index c714452a6..000000000
--- a/vp8/decoder/arm/detokenizearm_sjl.c
+++ /dev/null
@@ -1,730 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "type_aliases.h"
-#include "blockd.h"
-#include "onyxd_int.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_ports/mem.h"
-
-#define BR_COUNT 8
-#define BOOL_DATA UINT8
-
-#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-//ALIGN16 UINT16 onyx_coef_bands_x[16] = { 0, 1*OCB_X, 2*OCB_X, 3*OCB_X, 6*OCB_X, 4*OCB_X, 5*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 7*OCB_X};
-DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
-
-#define EOB_CONTEXT_NODE 0
-#define ZERO_CONTEXT_NODE 1
-#define ONE_CONTEXT_NODE 2
-#define LOW_VAL_CONTEXT_NODE 3
-#define TWO_CONTEXT_NODE 4
-#define THREE_CONTEXT_NODE 5
-#define HIGH_LOW_CONTEXT_NODE 6
-#define CAT_ONE_CONTEXT_NODE 7
-#define CAT_THREEFOUR_CONTEXT_NODE 8
-#define CAT_THREE_CONTEXT_NODE 9
-#define CAT_FIVE_CONTEXT_NODE 10
-
-
-
-
-DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
-{
- { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ZERO_TOKEN
- { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ONE_TOKEN
- { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //TWO_TOKEN
- { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //THREE_TOKEN
- { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //FOUR_TOKEN
- { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY1
- { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY2
- { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY3
- { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY4
- { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY5
- { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, //DCT_VAL_CATEGORY6
- { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, // EOB TOKEN
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-DECLARE_ALIGNED(16, const UINT8, vp8_block2context_leftabove[25*3]) =
-{
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, //end of vp8_block2context
- 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0, //end of vp8_block2left
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0 //end of vp8_block2above
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
-{
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
- int i;
-
- for (i = 0; i < 24; i++)
- {
-
- a = A[ vp8_block2context[i] ] + vp8_block2above[i];
- l = L[ vp8_block2context[i] ] + vp8_block2left[i];
-
- *a = *l = 0;
- }
-
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
- {
- a = A[Y2CONTEXT] + vp8_block2above[24];
- l = L[Y2CONTEXT] + vp8_block2left[24];
- *a = *l = 0;
- }
-
-
-}
-
-#define ONYXBLOCK2CONTEXT_OFFSET 0
-#define ONYXBLOCK2LEFT_OFFSET 25
-#define ONYXBLOCK2ABOVE_OFFSET 50
-
-DECLARE_ALIGNED(16, const static unsigned char, norm[128]) =
-{
- 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-void init_detokenizer(VP8D_COMP *dx)
-{
- const VP8_COMMON *const oc = & dx->common;
- MACROBLOCKD *x = & dx->mb;
-
- dx->detoken.norm_ptr = (unsigned char *)norm;
- dx->detoken.vp8_coef_tree_ptr = (vp8_tree_index *)vp8_coef_tree;
- dx->detoken.ptr_onyxblock2context_leftabove = (UINT8 *)vp8_block2context_leftabove;
- dx->detoken.ptr_onyx_coef_bands_x = vp8_coef_bands_x;
- dx->detoken.scan = (int *)vp8_default_zig_zag1d;
- dx->detoken.teb_base_ptr = (TOKENEXTRABITS *)vp8d_token_extra_bits2;
-
- dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
-
-
- dx->detoken.coef_probs[0] = (unsigned char *)(oc->fc.coef_probs [0] [ 0 ] [0]);
- dx->detoken.coef_probs[1] = (unsigned char *)(oc->fc.coef_probs [1] [ 0 ] [0]);
- dx->detoken.coef_probs[2] = (unsigned char *)(oc->fc.coef_probs [2] [ 0 ] [0]);
- dx->detoken.coef_probs[3] = (unsigned char *)(oc->fc.coef_probs [3] [ 0 ] [0]);
-
-}
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-
-//shift = norm[range]; \
-// shift = norm_ptr[range]; \
-
-#define NORMALIZE \
- /*if(range < 0x80)*/ \
- { \
- shift = detoken->norm_ptr[range]; \
- range <<= shift; \
- value <<= shift; \
- count -= shift; \
- if(count <= 0) \
- { \
- count += BR_COUNT ; \
- value |= (*bufptr) << (BR_COUNT-count); \
- bufptr++; \
- } \
- }
-#if 1
-#define DECODE_AND_APPLYSIGN(value_to_sign) \
- split = (range + 1) >> 1; \
- if ( (value >> 24) < split ) \
- { \
- range = split; \
- v= value_to_sign; \
- } \
- else \
- { \
- range = range-split; \
- value = value-(split<<24); \
- v = -value_to_sign; \
- } \
- range +=range; \
- value +=value; \
- if (!--count) \
- { \
- count = BR_COUNT; \
- value |= *bufptr; \
- bufptr++; \
- }
-
-#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
- { \
- split = 1 + ((( probability*(range-1) ) )>> 8); \
- if ( (value >> 24) < split ) \
- { \
- range = split; \
- NORMALIZE \
- goto branch; \
- } \
- value -= (split<<24); \
- range = range - split; \
- NORMALIZE \
- }
-
-#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
- { \
- split = 1 + ((( probability*(range-1) ) ) >> 8); \
- if ( (value >> 24) < split ) \
- { \
- range = split; \
- NORMALIZE \
- Prob = coef_probs; \
- ++c; \
- Prob += vp8_coef_bands_x[c]; \
- goto branch; \
- } \
- value -= (split<<24); \
- range = range - split; \
- NORMALIZE \
- }
-
-#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \
- DECODE_AND_APPLYSIGN(val) \
- Prob = coef_probs + (ENTROPY_NODES*2); \
- if(c < 15){\
- qcoeff_ptr [ scan[c] ] = (INT16) v; \
- ++c; \
- goto DO_WHILE; }\
- qcoeff_ptr [ scan[15] ] = (INT16) v; \
- goto BLOCK_FINISHED;
-
-
-#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
- split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
- if(value >= (split<<24))\
- {\
- range = range-split;\
- value = value-(split<<24);\
- val += ((UINT16)1<<bits_count);\
- }\
- else\
- {\
- range = split;\
- }\
- NORMALIZE
-#endif
-
-#if 0
-int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
-{
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
- const VP8_COMMON *const oc = & dx->common;
-
- BOOL_DECODER *bc = x->current_bc;
-
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
- int i;
-
- int eobtotal = 0;
-
- register int count;
-
- BOOL_DATA *bufptr;
- register unsigned int range;
- register unsigned int value;
- const int *scan;
- register unsigned int shift;
- UINT32 split;
- INT16 *qcoeff_ptr;
-
- UINT8 *coef_probs;
- int type;
- int stop;
- INT16 val, bits_count;
- INT16 c;
- INT16 t;
- INT16 v;
- vp8_prob *Prob;
-
- //int *scan;
- type = 3;
- i = 0;
- stop = 16;
-
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
- {
- i = 24;
- stop = 24;
- type = 1;
- qcoeff_ptr = &x->qcoeff[24*16];
- scan = vp8_default_zig_zag1d;
- eobtotal -= 16;
- }
- else
- {
- scan = vp8_default_zig_zag1d;
- qcoeff_ptr = &x->qcoeff[0];
- }
-
- count = bc->count;
- range = bc->range;
- value = bc->value;
- bufptr = &bc->buffer[bc->pos];
-
-
- coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
-
-BLOCK_LOOP:
- a = A[ vp8_block2context[i] ] + vp8_block2above[i];
- l = L[ vp8_block2context[i] ] + vp8_block2left[i];
- c = (INT16)(!type);
-
- VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
- Prob = coef_probs;
- Prob += t * ENTROPY_NODES;
-
-DO_WHILE:
- Prob += vp8_coef_bands_x[c];
- DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
-
-CHECK_0_:
- DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_);
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val;
- bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length;
-
- do
- {
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count);
- bits_count -- ;
- }
- while (bits_count >= 0);
-
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_FIVE_CONTEXT_NODE_0_:
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREEFOUR_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_);
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREE_CONTEXT_NODE_0_:
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-HIGH_LOW_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_);
-
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_ONE_CONTEXT_NODE_0_:
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-LOW_VAL_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4);
-
-THREE_CONTEXT_NODE_0_:
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3);
-
-TWO_CONTEXT_NODE_0_:
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2);
-
-ONE_CONTEXT_NODE_0_:
- DECODE_AND_APPLYSIGN(1);
- Prob = coef_probs + ENTROPY_NODES;
-
- if (c < 15)
- {
- qcoeff_ptr [ scan[c] ] = (INT16) v;
- ++c;
- goto DO_WHILE;
- }
-
- qcoeff_ptr [ scan[15] ] = (INT16) v;
-BLOCK_FINISHED:
- t = ((x->Block[i].eob = c) != !type); // any nonzero data?
- eobtotal += x->Block[i].eob;
- *a = *l = t;
- qcoeff_ptr += 16;
-
- i++;
-
- if (i < stop)
- goto BLOCK_LOOP;
-
- if (i == 25)
- {
- scan = vp8_default_zig_zag1d;//x->scan_order1d;
- type = 0;
- i = 0;
- stop = 16;
- coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
- qcoeff_ptr = &x->qcoeff[0];
- goto BLOCK_LOOP;
- }
-
- if (i == 16)
- {
- type = 2;
- coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
- stop = 24;
- goto BLOCK_LOOP;
- }
-
- bc->count = count;
- bc->value = value;
- bc->range = range;
- bc->pos = bufptr - bc->buffer;
- return eobtotal;
-
-}
-//#endif
-#else
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-#if 0
-//uses relative offsets
-
-const vp8_tree_index vp8_coef_tree_x[ 22] = /* corresponding _CONTEXT_NODEs */
-{
- -DCT_EOB_TOKEN, 1, /* 0 = EOB */
- -ZERO_TOKEN, 1, /* 1 = ZERO */
- -ONE_TOKEN, 1, /* 2 = ONE */
- 2, 5, /* 3 = LOW_VAL */
- -TWO_TOKEN, 1, /* 4 = TWO */
- -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
- 2, 3, /* 6 = HIGH_LOW */
- -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
- 2, 3, /* 8 = CAT_THREEFOUR */
- -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
- -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
-};
-#endif
-
-#define _SCALEDOWN 8 //16 //8
-
-int vp8_decode_mb_tokens_v5(DETOK *detoken, int type);
-
-int vp8_decode_mb_tokens_v5_c(DETOK *detoken, int type)
-{
- BOOL_DECODER *bc = detoken->current_bc;
-
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
- int i;
-
- register int count;
-
- BOOL_DATA *bufptr;
- register unsigned int range;
- register unsigned int value;
- register unsigned int shift;
- UINT32 split;
- INT16 *qcoeff_ptr;
-
- UINT8 *coef_probs;
-// int type;
- int stop;
- INT16 c;
- INT16 t;
- INT16 v;
- vp8_prob *Prob;
-
-
-
-// type = 3;
- i = 0;
- stop = 16;
- qcoeff_ptr = detoken->qcoeff_start_ptr;
-
-// if( detoken->mode != B_PRED && detoken->mode != SPLITMV)
- if (type == 1)
- {
- i += 24;
- stop += 8; //24;
-// type = 1;
- qcoeff_ptr += 24 * 16;
-// eobtotal-=16;
- }
-
- count = bc->count;
- range = bc->range;
- value = bc->value;
- bufptr = &bc->buffer[bc->pos];
-
-
- coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
-
-BLOCK_LOOP:
- a = detoken->A[ detoken->ptr_onyxblock2context_leftabove[i] ];
- l = detoken->L[ detoken->ptr_onyxblock2context_leftabove[i] ];
- c = !type;
- a += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2ABOVE_OFFSET];
- l += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2LEFT_OFFSET];
-
- //#define ONYX_COMBINEENTROPYCONTEXTS( Dest, A, B) \
- //Dest = ((A)!=0) + ((B)!=0);
-
- VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
-
- Prob = coef_probs;
- Prob += t * ENTROPY_NODES;
- t = 0;
-
- do
- {
-
- {
-// onyx_tree_index * onyx_coef_tree_ptr = onyx_coef_tree_x;
-
- Prob += detoken->ptr_onyx_coef_bands_x[c];
-
- GET_TOKEN_START:
-
- do
- {
- split = 1 + (((range - 1) * (Prob[t>>1])) >> 8);
-
- if (value >> 24 >= split)
- {
- range = range - split;
- value = value - (split << 24);
- t += 1;
-
- //used to eliminate else branch
- split = range;
- }
-
- range = split;
-
- t = detoken->vp8_coef_tree_ptr[ t ];
-
- NORMALIZE
-
- }
- while (t > 0) ;
- }
- GET_TOKEN_STOP:
-
- if (t == -DCT_EOB_TOKEN)
- {
- break;
- }
-
- v = -t;
-
- if (v > FOUR_TOKEN)
- {
- INT16 bits_count;
- TOKENEXTRABITS *teb_ptr;
-
-// teb_ptr = &onyxd_token_extra_bits2[t];
-// teb_ptr = &onyxd_token_extra_bits2[v];
- teb_ptr = &detoken->teb_base_ptr[v];
-
-
- v = teb_ptr->min_val;
- bits_count = teb_ptr->Length;
-
- do
- {
- split = 1 + (((range - 1) * teb_ptr->Probs[bits_count]) >> _SCALEDOWN);
-
- if ((value >> 24) >= split)
- {
- range = range - split;
- value = value - (split << 24);
- v += ((UINT16)1 << bits_count);
-
- //used to eliminate else branch
- split = range;
- }
-
- range = split;
-
- NORMALIZE
-
- bits_count -- ;
- }
- while (bits_count >= 0);
- }
-
- Prob = coef_probs;
-
- if (t)
- {
- split = 1 + (((range - 1) * vp8_prob_half) >> 8);
-
- if ((value >> 24) >= split)
- {
- range = range - split;
- value = value - (split << 24);
- v = (v ^ -1) + 1; /* negate w/out conditionals */
-
- //used to eliminate else branch
- split = range;
- }
-
- range = split;
-
- NORMALIZE
- Prob += ENTROPY_NODES;
-
- if (t < -ONE_TOKEN)
- Prob += ENTROPY_NODES;
-
- t = -2;
- }
-
- //if t is zero, we will skip the eob table check
- t += 2;
- qcoeff_ptr [detoken->scan [c] ] = (INT16) v;
-
- }
- while (++c < 16);
-
- if (t != -DCT_EOB_TOKEN)
- {
- --c;
- }
-
- t = ((detoken->eob[i] = c) != !type); // any nonzero data?
-// eobtotal += detoken->eob[i];
- *a = *l = t;
- qcoeff_ptr += 16;
-
- i++;
-
- if (i < stop)
- goto BLOCK_LOOP;
-
- if (i == 25)
- {
- type = 0;
- i = 0;
- stop = 16;
-// coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
- coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
- qcoeff_ptr = detoken->qcoeff_start_ptr;
- goto BLOCK_LOOP;
- }
-
- if (i == 16)
- {
- type = 2;
-// coef_probs =(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
- coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
- stop = 24;
- goto BLOCK_LOOP;
- }
-
- bc->count = count;
- bc->value = value;
- bc->range = range;
- bc->pos = bufptr - bc->buffer;
- return 0;
-}
-//#if 0
-int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
-{
-// const ONYX_COMMON * const oc = & dx->common;
- int eobtotal = 0;
- int i, type;
- /*
- dx->detoken.norm_ptr = norm;
- dx->detoken.onyx_coef_tree_ptr = onyx_coef_tree;
- dx->detoken.ptr_onyxblock2context_leftabove = ONYXBLOCK2CONTEXT_LEFTABOVE;
- dx->detoken.ptr_onyx_coef_bands_x = onyx_coef_bands_x;
- dx->detoken.scan = default_zig_zag1d;
- dx->detoken.teb_base_ptr = onyxd_token_extra_bits2;
-
- dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
-
- dx->detoken.A = x->above_context;
- dx->detoken.L = x->left_context;
-
- dx->detoken.coef_probs[0] = (unsigned char *)( oc->fc.coef_probs [0] [ 0 ] [0]);
- dx->detoken.coef_probs[1] = (unsigned char *)( oc->fc.coef_probs [1] [ 0 ] [0]);
- dx->detoken.coef_probs[2] = (unsigned char *)( oc->fc.coef_probs [2] [ 0 ] [0]);
- dx->detoken.coef_probs[3] = (unsigned char *)( oc->fc.coef_probs [3] [ 0 ] [0]);
- */
-
- dx->detoken.current_bc = x->current_bc;
- dx->detoken.A = x->above_context;
- dx->detoken.L = x->left_context;
-
- type = 3;
-
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
- {
- type = 1;
- eobtotal -= 16;
- }
-
- vp8_decode_mb_tokens_v5(&dx->detoken, type);
-
- for (i = 0; i < 25; i++)
- {
- x->Block[i].eob = dx->detoken.eob[i];
- eobtotal += dx->detoken.eob[i];
- }
-
- return eobtotal;
-}
-#endif
diff --git a/vp8/decoder/arm/detokenizearm_v6.asm b/vp8/decoder/arm/detokenizearm_v6.asm
deleted file mode 100644
index 4d87ee5bd..000000000
--- a/vp8/decoder/arm/detokenizearm_v6.asm
+++ /dev/null
@@ -1,364 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_decode_mb_tokens_v5|
-
- AREA |.text|, CODE, READONLY ; name this block of code
-
- INCLUDE vpx_asm_offsets.asm
-
-l_qcoeff EQU 0
-l_i EQU 4
-l_type EQU 8
-l_stop EQU 12
-l_c EQU 16
-l_l_ptr EQU 20
-l_a_ptr EQU 24
-l_bc EQU 28
-l_coef_ptr EQU 32
-l_stacksize EQU 64
-
-
-;; constant offsets -- these should be created at build time
-c_onyxblock2left_offset EQU 25
-c_onyxblock2above_offset EQU 50
-c_entropy_nodes EQU 11
-c_dct_eob_token EQU 11
-
-|vp8_decode_mb_tokens_v5| PROC
- stmdb sp!, {r4 - r11, lr}
- sub sp, sp, #l_stacksize
- mov r7, r1
- mov r9, r0 ;DETOK *detoken
-
- ldr r1, [r9, #detok_current_bc]
- ldr r0, [r9, #detok_qcoeff_start_ptr]
- mov r11, #0
- mov r3, #0x10
-
- cmp r7, #1
- addeq r11, r11, #24
- addeq r3, r3, #8
- addeq r0, r0, #3, 24
-
- str r0, [sp, #l_qcoeff]
- str r11, [sp, #l_i]
- str r7, [sp, #l_type]
- str r3, [sp, #l_stop]
- str r1, [sp, #l_bc]
-
- add lr, r9, r7, lsl #2
-
- ldr r2, [r1, #bool_decoder_buffer]
- ldr r3, [r1, #bool_decoder_pos]
-
- ldr r10, [lr, #detok_coef_probs]
- ldr r5, [r1, #bool_decoder_count]
- ldr r6, [r1, #bool_decoder_range]
- ldr r4, [r1, #bool_decoder_value]
- add r8, r2, r3
-
- str r10, [sp, #l_coef_ptr]
-
-
- ;align 4
-BLOCK_LOOP
- ldr r3, [r9, #detok_ptr_onyxblock2context_leftabove]
- ldr r2, [r9, #DETOK_A]
- ldr r1, [r9, #DETOK_L]
- ldrb r12, [r3, +r11] ; detoken->ptr_onyxblock2context_leftabove[i]
-
- cmp r7, #0 ; check type
- moveq r7, #1
- movne r7, #0
-
- ldr r0, [r2, +r12, lsl #2] ; a
- add r1, r1, r12, lsl #4
- add r3, r3, r11
-
- ldrb r2, [r3, #c_onyxblock2above_offset]
- ldrb r3, [r3, #c_onyxblock2left_offset]
- mov lr, #c_entropy_nodes
-;; ;++
-
- ldr r2, [r0, +r2, lsl #2]!
- add r3, r1, r3, lsl #2
- str r3, [sp, #l_l_ptr]
- ldr r3, [r3]
-
- cmp r2, #0
- movne r2, #1
- cmp r3, #0
- addne r2, r2, #1
-
- str r0, [sp, #l_a_ptr]
- smlabb r0, r2, lr, r10
- mov r1, #0 ; t = 0
- str r7, [sp, #l_c]
-
- ;align 4
-COEFF_LOOP
- ldr r3, [r9, #detok_ptr_onyx_coef_bands_x]
- ldr lr, [r9, #detok_onyx_coef_tree_ptr]
-
-;;the following two lines are used if onyx_coef_bands_x is UINT16
-;; add r3, r3, r7, lsl #1
-;; ldrh r3, [r3]
-
-;;the following line is used if onyx_coef_bands_x is UINT8
- ldrb r3, [r7, +r3]
-
-
-;; ;++
-;; pld [r8]
- ;++
- add r0, r0, r3
-
- ;align 4
-get_token_loop
- ldrb r2, [r0, +r1, asr #1]
- mov r3, r6, lsl #8
- sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
- mov r10, #1
-
- smlawb r2, r3, r2, r10
- ldrb r12, [r8] ;load cx data byte in stall slot
- ;++
-
- subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
- addhs r1, r1, #1 ;t += 1
- movhs r4, r3 ;update value
- subhs r2, r6, r2 ;range = range - split
- movlo r6, r2
-
-;;; ldrsbhs r1, [r1, +lr]
- ldrsb r1, [r1, +lr]
-
-
-;; use branch for short pipelines ???
-;; cmp r2, #0x80
-;; bcs |$LN22@decode_mb_to|
-
- clz r3, r2
- sub r3, r3, #24
- subs r5, r5, r3
- mov r6, r2, lsl r3
- mov r4, r4, lsl r3
-
-;; use branch for short pipelines ???
-;; bgt |$LN22@decode_mb_to|
-
- addle r5, r5, #8
- rsble r3, r5, #8
- addle r8, r8, #1
- orrle r4, r4, r12, lsl r3
-
-;;|$LN22@decode_mb_to|
-
- cmp r1, #0
- bgt get_token_loop
-
- cmn r1, #c_dct_eob_token ;if(t == -DCT_EOB_TOKEN)
- beq END_OF_BLOCK
-
- rsb lr, r1, #0 ;v = -t;
-
- cmp lr, #4 ;if(v > FOUR_TOKEN)
- ble SKIP_EXTRABITS
-
- ldr r3, [r9, #detok_teb_base_ptr]
- mov r11, #1
- add r7, r3, lr, lsl #4
-
- ldrsh lr, [r7, #tokenextrabits_min_val];v = teb_ptr->min_val
- ldrsh r0, [r7, #tokenextrabits_length];bits_count = teb_ptr->Length
-
-extrabits_loop
- add r3, r0, r7
-
- ldrb r2, [r3, #4]
- mov r3, r6, lsl #8
- sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
- mov r10, #1
-
- smlawb r2, r3, r2, r10
- ldrb r12, [r8]
- ;++
-
- subs r10, r4, r2, lsl #24 ;x = value-(split<<24)
- movhs r4, r10 ;update value
- subhs r2, r6, r2 ;range = range - split
- addhs lr, lr, r11, lsl r0 ;v += ((UINT16)1<<bits_count)
- movlo r6, r2 ;range = split
-
-
-;; use branch for short pipelines ???
-;; cmp r2, #0x80
-;; bcs |$LN10@decode_mb_to|
-
- clz r3, r2
- sub r3, r3, #24
- subs r5, r5, r3
- mov r6, r2, lsl r3 ;range
- mov r4, r4, lsl r3 ;value
-
- addle r5, r5, #8
- addle r8, r8, #1
- rsble r3, r5, #8
- orrle r4, r4, r12, lsl r3
-
-;;|$LN10@decode_mb_to|
- subs r0, r0, #1
- bpl extrabits_loop
-
-
-SKIP_EXTRABITS
- ldr r11, [sp, #l_qcoeff]
- ldr r0, [sp, #l_coef_ptr]
-
- cmp r1, #0 ;check for nonzero token
- beq SKIP_EOB_CHECK ;if t is zero, we will skip the eob table chec
-
- sub r3, r6, #1 ;range - 1
- ;++
- mov r3, r3, lsl #7 ; *= onyx_prob_half (128)
- ;++
- mov r3, r3, lsr #8
- add r2, r3, #1 ;split
-
- subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
- movhs r4, r3 ;update value
- subhs r2, r6, r2 ;range = range - split
- mvnhs r3, lr
- addhs lr, r3, #1 ;v = (v ^ -1) + 1
- movlo r6, r2 ;range = split
-
-;; use branch for short pipelines ???
-;; cmp r2, #0x80
-;; bcs |$LN6@decode_mb_to|
-
- clz r3, r2
- sub r3, r3, #24
- subs r5, r5, r3
- mov r6, r2, lsl r3
- mov r4, r4, lsl r3
- ldrleb r2, [r8], #1
- addle r5, r5, #8
- rsble r3, r5, #8
- orrle r4, r4, r2, lsl r3
-
-;;|$LN6@decode_mb_to|
- add r0, r0, #0xB
-
- cmn r1, #1
-
- addlt r0, r0, #0xB
-
- mvn r1, #1
-
-SKIP_EOB_CHECK
- ldr r7, [sp, #l_c]
- ldr r3, [r9, #detok_scan]
- add r1, r1, #2
- cmp r7, #(0x10 - 1) ;assume one less for now.... increment below
-
- ldr r3, [r3, +r7, lsl #2]
- add r7, r7, #1
- add r3, r11, r3, lsl #1
-
- str r7, [sp, #l_c]
- strh lr, [r3]
-
- blt COEFF_LOOP
-
- sub r7, r7, #1 ;if(t != -DCT_EOB_TOKEN) --c
-
-END_OF_BLOCK
- ldr r3, [sp, #l_type]
- ldr r10, [sp, #l_coef_ptr]
- ldr r0, [sp, #l_qcoeff]
- ldr r11, [sp, #l_i]
- ldr r12, [sp, #l_stop]
-
- cmp r3, #0
- moveq r1, #1
- movne r1, #0
- add r3, r11, r9
-
- cmp r7, r1
- strb r7, [r3, #detok_eob]
-
- ldr r7, [sp, #l_l_ptr]
- ldr r2, [sp, #l_a_ptr]
- movne r3, #1
- moveq r3, #0
-
- add r0, r0, #0x20
- add r11, r11, #1
- str r3, [r7]
- str r3, [r2]
- str r0, [sp, #l_qcoeff]
- str r11, [sp, #l_i]
-
- cmp r11, r12 ;i >= stop ?
- ldr r7, [sp, #l_type]
- mov lr, #0xB
-
- blt BLOCK_LOOP
-
- cmp r11, #0x19
- bne ln2_decode_mb_to
-
- ldr r12, [r9, #detok_qcoeff_start_ptr]
- ldr r10, [r9, #detok_coef_probs]
- mov r7, #0
- mov r3, #0x10
- str r12, [sp, #l_qcoeff]
- str r7, [sp, #l_i]
- str r7, [sp, #l_type]
- str r3, [sp, #l_stop]
-
- str r10, [sp, #l_coef_ptr]
-
- b BLOCK_LOOP
-
-ln2_decode_mb_to
- cmp r11, #0x10
- bne ln1_decode_mb_to
-
- ldr r10, [r9, #0x30]
-
- mov r7, #2
- mov r3, #0x18
-
- str r7, [sp, #l_type]
- str r3, [sp, #l_stop]
-
- str r10, [sp, #l_coef_ptr]
- b BLOCK_LOOP
-
-ln1_decode_mb_to
- ldr r2, [sp, #l_bc]
- mov r0, #0
- nop
-
- ldr r3, [r2, #bool_decoder_buffer]
- str r5, [r2, #bool_decoder_count]
- str r4, [r2, #bool_decoder_value]
- sub r3, r8, r3
- str r3, [r2, #bool_decoder_pos]
- str r6, [r2, #bool_decoder_range]
-
- add sp, sp, #l_stacksize
- ldmia sp!, {r4 - r11, pc}
-
- ENDP ; |vp8_decode_mb_tokens_v5|
-
- END
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
deleted file mode 100644
index 455c83a9c..000000000
--- a/vp8/decoder/arm/dsystemdependent.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "blockd.h"
-#include "pragmas.h"
-#include "postproc.h"
-#include "dboolhuff.h"
-#include "dequantize.h"
-#include "onyxd_int.h"
-
-void vp8_dmachine_specific_config(VP8D_COMP *pbi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
- pbi->mb.rtcd = &pbi->common.rtcd;
-#if HAVE_ARMV7
- pbi->dequant.block = vp8_dequantize_b_neon;
- pbi->dequant.idct = vp8_dequant_idct_neon;
- pbi->dequant.idct_dc = vp8_dequant_dc_idct_neon;
- pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.stop = vp8dx_stop_decode_c;
- pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
- pbi->dboolhuff.debool = vp8dx_decode_bool_c;
- pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-
-#elif HAVE_ARMV6
- pbi->dequant.block = vp8_dequantize_b_v6;
- pbi->dequant.idct = vp8_dequant_idct_v6;
- pbi->dequant.idct_dc = vp8_dequant_dc_idct_v6;
- pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.stop = vp8dx_stop_decode_c;
- pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
- pbi->dboolhuff.debool = vp8dx_decode_bool_c;
- pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-#endif
-#endif
-}
diff --git a/vp8/decoder/arm/neon/dboolhuff_neon.asm b/vp8/decoder/arm/neon/dboolhuff_neon.asm
index 7ec62a3d8..ff3ffda97 100644
--- a/vp8/decoder/arm/neon/dboolhuff_neon.asm
+++ b/vp8/decoder/arm/neon/dboolhuff_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/neon/dequantidct_neon.asm b/vp8/decoder/arm/neon/dequant_idct_neon.asm
index bba4d5dfb..1923be42a 100644
--- a/vp8/decoder/arm/neon/dequantidct_neon.asm
+++ b/vp8/decoder/arm/neon/dequant_idct_neon.asm
@@ -1,29 +1,41 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp8_dequant_idct_neon|
+ EXPORT |vp8_dequant_idct_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch);
+;void vp8_dequant_idct_neon(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride)
; r0 short *input,
; r1 short *dq,
-; r2 short *output,
-; r3 int pitch,
-|vp8_dequant_idct_neon| PROC
+; r2 unsigned char *pred
+; r3 unsigned char *dest
+; sp int pitch
+; sp+4 int stride
+
+|vp8_dequant_idct_add_neon| PROC
vld1.16 {q3, q4}, [r0]
vld1.16 {q5, q6}, [r1]
+ ldr r1, [sp] ; pitch
+ vld1.32 {d14[0]}, [r2], r1
+ vld1.32 {d14[1]}, [r2], r1
+ vld1.32 {d15[0]}, [r2], r1
+ vld1.32 {d15[1]}, [r2]
+
+ ldr r1, [sp, #4] ; stride
- ldr r12, _didct_coeff_
+ ldr r12, _CONSTANTS_
vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
vmul.i16 q2, q4, q6
@@ -41,14 +53,9 @@
vshr.s16 q3, q3, #1
vshr.s16 q4, q4, #1
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+ vqadd.s16 q3, q3, q2
vqadd.s16 q4, q4, q2
- ;d6 - c1:temp1
- ;d7 - d1:temp2
- ;d8 - d1:temp1
- ;d9 - c1:temp2
-
vqsub.s16 d10, d6, d9 ;c1
vqadd.s16 d11, d7, d8 ;d1
@@ -77,7 +84,7 @@
vshr.s16 q3, q3, #1
vshr.s16 q4, q4, #1
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+ vqadd.s16 q3, q3, q2
vqadd.s16 q4, q4, q2
vqsub.s16 d10, d6, d9 ;c1
@@ -95,34 +102,29 @@
vrshr.s16 d4, d4, #3
vrshr.s16 d5, d5, #3
- add r1, r2, r3
- add r12, r1, r3
- add r0, r12, r3
-
vtrn.32 d2, d4
vtrn.32 d3, d5
vtrn.16 d2, d3
vtrn.16 d4, d5
- vst1.16 {d2}, [r2]
- vst1.16 {d3}, [r1]
- vst1.16 {d4}, [r12]
- vst1.16 {d5}, [r0]
+ vaddw.u8 q1, q1, d14
+ vaddw.u8 q2, q2, d15
- bx lr
+ vqmovun.s16 d0, q1
+ vqmovun.s16 d1, q2
+
+ vst1.32 {d0[0]}, [r3], r1
+ vst1.32 {d0[1]}, [r3], r1
+ vst1.32 {d1[0]}, [r3], r1
+ vst1.32 {d1[1]}, [r3]
- ENDP
+ bx lr
-;-----------------
- AREA didct4x4_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_didct_coeff_
- DCD didct_coeff
-didct_coeff
- DCD 0x4e7b4e7b, 0x8a8c8a8c
+ ENDP ; |vp8_dequant_idct_add_neon|
-;20091, 20091, 35468, 35468
+; Constant Pool
+_CONSTANTS_ DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b4e7b
+sinpi8sqrt2 DCD 0x8a8c8a8c
END
diff --git a/vp8/decoder/arm/neon/dequantdcidct_neon.asm b/vp8/decoder/arm/neon/dequantdcidct_neon.asm
deleted file mode 100644
index 3392f2c2b..000000000
--- a/vp8/decoder/arm/neon/dequantdcidct_neon.asm
+++ /dev/null
@@ -1,133 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequant_dc_idct_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc);
-; r0 short *input,
-; r1 short *dq,
-; r2 short *output,
-; r3 int pitch,
-; (stack) int Dc
-|vp8_dequant_dc_idct_neon| PROC
- vld1.16 {q3, q4}, [r0]
- vld1.16 {q5, q6}, [r1]
-
- ldr r1, [sp] ;load Dc from stack
-
- ldr r12, _dcidct_coeff_
-
- vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
- vmul.i16 q2, q4, q6
-
- vmov.16 d2[0], r1
-
-;|short_idct4x4llm_neon| PROC
- vld1.16 {d0}, [r12]
- vswp d3, d4 ;q2(vp[4] vp[12])
-
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
- vqadd.s16 q4, q4, q2
-
- ;d6 - c1:temp1
- ;d7 - d1:temp2
- ;d8 - d1:temp1
- ;d9 - c1:temp2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
-; memset(input, 0, 32) -- 32bytes
- vmov.i16 q14, #0
-
- vswp d3, d4
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vmov q15, q14
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
- vqadd.s16 q4, q4, q2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vst1.16 {q14, q15}, [r0]
-
- vrshr.s16 d2, d2, #3
- vrshr.s16 d3, d3, #3
- vrshr.s16 d4, d4, #3
- vrshr.s16 d5, d5, #3
-
- add r1, r2, r3
- add r12, r1, r3
- add r0, r12, r3
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
- vst1.16 {d2}, [r2]
- vst1.16 {d3}, [r1]
- vst1.16 {d4}, [r12]
- vst1.16 {d5}, [r0]
-
- bx lr
-
- ENDP
-
-;-----------------
- AREA dcidct4x4_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_dcidct_coeff_
- DCD dcidct_coeff
-dcidct_coeff
- DCD 0x4e7b4e7b, 0x8a8c8a8c
-
-;20091, 20091, 35468, 35468
-
- END
diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/decoder/arm/neon/dequantizeb_neon.asm
index 1bde94607..c8e0c31f2 100644
--- a/vp8/decoder/arm/neon/dequantizeb_neon.asm
+++ b/vp8/decoder/arm/neon/dequantizeb_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c
new file mode 100644
index 000000000..fe4f2e0d4
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_blk_neon.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+/* place these declarations here because we don't want to maintain them
+ * outside of this scope
+ */
+void idct_dequant_dc_full_2x_neon
+ (short *input, short *dq, unsigned char *pre, unsigned char *dst,
+ int stride, short *dc);
+void idct_dequant_dc_0_2x_neon
+ (short *dc, unsigned char *pre, unsigned char *dst, int stride);
+void idct_dequant_full_2x_neon
+ (short *q, short *dq, unsigned char *pre, unsigned char *dst,
+ int pitch, int stride);
+void idct_dequant_0_2x_neon
+ (short *q, short dq, unsigned char *pre, int pitch,
+ unsigned char *dst, int stride);
+
+void vp8_dequant_dc_idct_add_y_block_neon
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs, short *dc)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (((short *)eobs)[0] & 0xfefe)
+ idct_dequant_dc_full_2x_neon (q, dq, pre, dst, stride, dc);
+ else
+ idct_dequant_dc_0_2x_neon(dc, pre, dst, stride);
+
+ if (((short *)eobs)[1] & 0xfefe)
+ idct_dequant_dc_full_2x_neon (q+32, dq, pre+8, dst+8, stride, dc+2);
+ else
+ idct_dequant_dc_0_2x_neon(dc+2, pre+8, dst+8, stride);
+
+ q += 64;
+ dc += 4;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_y_block_neon
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (((short *)eobs)[0] & 0xfefe)
+ idct_dequant_full_2x_neon (q, dq, pre, dst, 16, stride);
+ else
+ idct_dequant_0_2x_neon (q, dq[0], pre, 16, dst, stride);
+
+ if (((short *)eobs)[1] & 0xfefe)
+ idct_dequant_full_2x_neon (q+32, dq, pre+8, dst+8, 16, stride);
+ else
+ idct_dequant_0_2x_neon (q+32, dq[0], pre+8, 16, dst+8, stride);
+
+ q += 64;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_neon
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ if (((short *)eobs)[0] & 0xfefe)
+ idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride);
+ else
+ idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride);
+
+ q += 32;
+ pre += 32;
+ dstu += 4*stride;
+
+ if (((short *)eobs)[1] & 0xfefe)
+ idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride);
+ else
+ idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride);
+
+ q += 32;
+ pre += 32;
+
+ if (((short *)eobs)[2] & 0xfefe)
+ idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride);
+ else
+ idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride);
+
+ q += 32;
+ pre += 32;
+ dstv += 4*stride;
+
+ if (((short *)eobs)[3] & 0xfefe)
+ idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride);
+ else
+ idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride);
+}
diff --git a/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
new file mode 100644
index 000000000..456f8e1d4
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
@@ -0,0 +1,79 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |idct_dequant_0_2x_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_0_2x_neon(short *q, short dq, unsigned char *pre,
+; int pitch, unsigned char *dst, int stride);
+; r0 *q
+; r1 dq
+; r2 *pre
+; r3 pitch
+; sp *dst
+; sp+4 stride
+|idct_dequant_0_2x_neon| PROC
+ add r12, r2, #4
+ vld1.32 {d2[0]}, [r2], r3
+ vld1.32 {d2[1]}, [r2], r3
+ vld1.32 {d4[0]}, [r2], r3
+ vld1.32 {d4[1]}, [r2]
+ vld1.32 {d8[0]}, [r12], r3
+ vld1.32 {d8[1]}, [r12], r3
+ vld1.32 {d10[0]}, [r12], r3
+ vld1.32 {d10[1]}, [r12]
+
+ ldrh r12, [r0] ; lo q
+ ldrh r2, [r0, #32] ; hi q
+ mov r3, #0
+ strh r3, [r0]
+ strh r3, [r0, #32]
+
+ sxth r12, r12 ; lo
+ mul r0, r12, r1
+ add r0, r0, #4
+ asr r0, r0, #3
+ vdup.16 q0, r0
+ sxth r2, r2 ; hi
+ mul r0, r2, r1
+ add r0, r0, #4
+ asr r0, r0, #3
+ vdup.16 q3, r0
+
+ vaddw.u8 q1, q0, d2 ; lo
+ vaddw.u8 q2, q0, d4
+ vaddw.u8 q4, q3, d8 ; hi
+ vaddw.u8 q5, q3, d10
+
+ ldr r2, [sp] ; dst
+ ldr r3, [sp, #4] ; stride
+
+ vqmovun.s16 d2, q1 ; lo
+ vqmovun.s16 d4, q2
+ vqmovun.s16 d8, q4 ; hi
+ vqmovun.s16 d10, q5
+
+ add r0, r2, #4
+ vst1.32 {d2[0]}, [r2], r3 ; lo
+ vst1.32 {d2[1]}, [r2], r3
+ vst1.32 {d4[0]}, [r2], r3
+ vst1.32 {d4[1]}, [r2]
+ vst1.32 {d8[0]}, [r0], r3 ; hi
+ vst1.32 {d8[1]}, [r0], r3
+ vst1.32 {d10[0]}, [r0], r3
+ vst1.32 {d10[1]}, [r0]
+
+ bx lr
+
+ ENDP ; |idct_dequant_0_2x_neon|
+ END
diff --git a/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
new file mode 100644
index 000000000..0dc036acb
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
@@ -0,0 +1,69 @@
+;
+; Copyright (c) 2010 The Webm project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |idct_dequant_dc_0_2x_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_dc_0_2x_neon(short *dc, unsigned char *pre,
+; unsigned char *dst, int stride);
+; r0 *dc
+; r1 *pre
+; r2 *dst
+; r3 stride
+|idct_dequant_dc_0_2x_neon| PROC
+ ldr r0, [r0] ; *dc
+ mov r12, #16
+
+ vld1.32 {d2[0]}, [r1], r12 ; lo
+ vld1.32 {d2[1]}, [r1], r12
+ vld1.32 {d4[0]}, [r1], r12
+ vld1.32 {d4[1]}, [r1]
+ sub r1, r1, #44
+ vld1.32 {d8[0]}, [r1], r12 ; hi
+ vld1.32 {d8[1]}, [r1], r12
+ vld1.32 {d10[0]}, [r1], r12
+ vld1.32 {d10[1]}, [r1]
+
+ sxth r1, r0 ; lo *dc
+ add r1, r1, #4
+ asr r1, r1, #3
+ vdup.16 q0, r1
+ sxth r0, r0, ror #16 ; hi *dc
+ add r0, r0, #4
+ asr r0, r0, #3
+ vdup.16 q3, r0
+
+ vaddw.u8 q1, q0, d2 ; lo
+ vaddw.u8 q2, q0, d4
+ vaddw.u8 q4, q3, d8 ; hi
+ vaddw.u8 q5, q3, d10
+
+ vqmovun.s16 d2, q1 ; lo
+ vqmovun.s16 d4, q2
+ vqmovun.s16 d8, q4 ; hi
+ vqmovun.s16 d10, q5
+
+ add r0, r2, #4
+ vst1.32 {d2[0]}, [r2], r3 ; lo
+ vst1.32 {d2[1]}, [r2], r3
+ vst1.32 {d4[0]}, [r2], r3
+ vst1.32 {d4[1]}, [r2]
+ vst1.32 {d8[0]}, [r0], r3 ; hi
+ vst1.32 {d8[1]}, [r0], r3
+ vst1.32 {d10[0]}, [r0], r3
+ vst1.32 {d10[1]}, [r0]
+
+ bx lr
+
+ ENDP ;|idct_dequant_dc_0_2x_neon|
+ END
diff --git a/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
new file mode 100644
index 000000000..ad4364adc
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
@@ -0,0 +1,206 @@
+;
+; Copyright (c) 2010 The Webm project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |idct_dequant_dc_full_2x_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_dc_full_2x_neon(short *q, short *dq, unsigned char *pre,
+; unsigned char *dst, int stride, short *dc);
+; r0 *q,
+; r1 *dq,
+; r2 *pre
+; r3 *dst
+; sp stride
+; sp+4 *dc
+|idct_dequant_dc_full_2x_neon| PROC
+ vld1.16 {q0, q1}, [r1] ; dq (same l/r)
+ vld1.16 {q2, q3}, [r0] ; l q
+ mov r1, #16 ; pitch
+ add r0, r0, #32
+ vld1.16 {q4, q5}, [r0] ; r q
+ add r12, r2, #4
+ ; interleave the predictors
+ vld1.32 {d28[0]}, [r2], r1 ; l pre
+ vld1.32 {d28[1]}, [r12], r1 ; r pre
+ vld1.32 {d29[0]}, [r2], r1
+ vld1.32 {d29[1]}, [r12], r1
+ vld1.32 {d30[0]}, [r2], r1
+ vld1.32 {d30[1]}, [r12], r1
+ vld1.32 {d31[0]}, [r2]
+ ldr r1, [sp, #4]
+ vld1.32 {d31[1]}, [r12]
+
+ ldr r2, _CONSTANTS_
+
+ ldrh r12, [r1], #2 ; lo *dc
+ ldrh r1, [r1] ; hi *dc
+
+ ; dequant: q[i] = q[i] * dq[i]
+ vmul.i16 q2, q2, q0
+ vmul.i16 q3, q3, q1
+ vmul.i16 q4, q4, q0
+ vmul.i16 q5, q5, q1
+
+ ; move dc up to neon and overwrite first element
+ vmov.16 d4[0], r12
+ vmov.16 d8[0], r1
+
+ vld1.16 {d0}, [r2]
+
+ ; q2: l0r0 q3: l8r8
+ ; q4: l4r4 q5: l12r12
+ vswp d5, d8
+ vswp d7, d10
+
+ ; _CONSTANTS_ * 4,12 >> 16
+ ; q6: 4 * sinpi : c1/temp1
+ ; q7: 12 * sinpi : d1/temp2
+ ; q8: 4 * cospi
+ ; q9: 12 * cospi
+ vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2
+ vqdmulh.s16 q7, q5, d0[2]
+ vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1
+ vqdmulh.s16 q9, q5, d0[0]
+
+ vqadd.s16 q10, q2, q3 ; a1 = 0 + 8
+ vqsub.s16 q11, q2, q3 ; b1 = 0 - 8
+
+ ; vqdmulh only accepts signed values. this was a problem because
+ ; our constant had the high bit set, and was treated as a negative value.
+ ; vqdmulh also doubles the value before it shifts by 16. we need to
+ ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0,
+ ; so we can shift the constant without losing precision. this avoids
+ ; shift again afterward, but also avoids the sign issue. win win!
+ ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we
+ ; pre-shift it
+ vshr.s16 q8, q8, #1
+ vshr.s16 q9, q9, #1
+
+ ; q4: 4 + 4 * cospi : d1/temp1
+ ; q5: 12 + 12 * cospi : c1/temp2
+ vqadd.s16 q4, q4, q8
+ vqadd.s16 q5, q5, q9
+
+ ; c1 = temp1 - temp2
+ ; d1 = temp1 + temp2
+ vqsub.s16 q2, q6, q5
+ vqadd.s16 q3, q4, q7
+
+ ; [0]: a1+d1
+ ; [1]: b1+c1
+ ; [2]: b1-c1
+ ; [3]: a1-d1
+ vqadd.s16 q4, q10, q3
+ vqadd.s16 q5, q11, q2
+ vqsub.s16 q6, q11, q2
+ vqsub.s16 q7, q10, q3
+
+ ; rotate
+ vtrn.32 q4, q6
+ vtrn.32 q5, q7
+ vtrn.16 q4, q5
+ vtrn.16 q6, q7
+ ; idct loop 2
+ ; q4: l 0, 4, 8,12 r 0, 4, 8,12
+ ; q5: l 1, 5, 9,13 r 1, 5, 9,13
+ ; q6: l 2, 6,10,14 r 2, 6,10,14
+ ; q7: l 3, 7,11,15 r 3, 7,11,15
+
+ ; q8: 1 * sinpi : c1/temp1
+ ; q9: 3 * sinpi : d1/temp2
+ ; q10: 1 * cospi
+ ; q11: 3 * cospi
+ vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2
+ vqdmulh.s16 q9, q7, d0[2]
+ vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1
+ vqdmulh.s16 q11, q7, d0[0]
+
+ vqadd.s16 q2, q4, q6 ; a1 = 0 + 2
+ vqsub.s16 q3, q4, q6 ; b1 = 0 - 2
+
+ ; see note on shifting above
+ vshr.s16 q10, q10, #1
+ vshr.s16 q11, q11, #1
+
+ ; q10: 1 + 1 * cospi : d1/temp1
+ ; q11: 3 + 3 * cospi : c1/temp2
+ vqadd.s16 q10, q5, q10
+ vqadd.s16 q11, q7, q11
+
+ ; q8: c1 = temp1 - temp2
+ ; q9: d1 = temp1 + temp2
+ vqsub.s16 q8, q8, q11
+ vqadd.s16 q9, q10, q9
+
+ ; a1+d1
+ ; b1+c1
+ ; b1-c1
+ ; a1-d1
+ vqadd.s16 q4, q2, q9
+ vqadd.s16 q5, q3, q8
+ vqsub.s16 q6, q3, q8
+ vqsub.s16 q7, q2, q9
+
+ ; +4 >> 3 (rounding)
+ vrshr.s16 q4, q4, #3 ; lo
+ vrshr.s16 q5, q5, #3
+ vrshr.s16 q6, q6, #3 ; hi
+ vrshr.s16 q7, q7, #3
+
+ vtrn.32 q4, q6
+ vtrn.32 q5, q7
+ vtrn.16 q4, q5
+ vtrn.16 q6, q7
+
+ ; adding pre
+ ; input is still packed. pre was read interleaved
+ vaddw.u8 q4, q4, d28
+ vaddw.u8 q5, q5, d29
+ vaddw.u8 q6, q6, d30
+ vaddw.u8 q7, q7, d31
+
+ vmov.i16 q14, #0
+ vmov q15, q14
+ vst1.16 {q14, q15}, [r0] ; write over high input
+ sub r0, r0, #32
+ vst1.16 {q14, q15}, [r0] ; write over low input
+
+ ;saturate and narrow
+ vqmovun.s16 d0, q4 ; lo
+ vqmovun.s16 d1, q5
+ vqmovun.s16 d2, q6 ; hi
+ vqmovun.s16 d3, q7
+
+ ldr r1, [sp] ; stride
+ add r2, r3, #4 ; hi
+ vst1.32 {d0[0]}, [r3], r1 ; lo
+ vst1.32 {d0[1]}, [r2], r1 ; hi
+ vst1.32 {d1[0]}, [r3], r1
+ vst1.32 {d1[1]}, [r2], r1
+ vst1.32 {d2[0]}, [r3], r1
+ vst1.32 {d2[1]}, [r2], r1
+ vst1.32 {d3[0]}, [r3]
+ vst1.32 {d3[1]}, [r2]
+
+ bx lr
+
+ ENDP ; |idct_dequant_dc_full_2x_neon|
+
+; Constant Pool
+_CONSTANTS_ DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b
+; because the lowest bit in 0x8a8c is 0, we can pre-shift this
+sinpi8sqrt2 DCD 0x4546
+
+ END
diff --git a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
new file mode 100644
index 000000000..85fff11b3
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
@@ -0,0 +1,198 @@
+;
+; Copyright (c) 2010 The Webm project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |idct_dequant_full_2x_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_full_2x_neon(short *q, short *dq, unsigned char *pre,
+; unsigned char *dst, int pitch, int stride);
+; r0 *q,
+; r1 *dq,
+; r2 *pre
+; r3 *dst
+; sp pitch
+; sp+4 stride
+|idct_dequant_full_2x_neon| PROC
+ vld1.16 {q0, q1}, [r1] ; dq (same l/r)
+ vld1.16 {q2, q3}, [r0] ; l q
+ ldr r1, [sp] ; pitch
+ add r0, r0, #32
+ vld1.16 {q4, q5}, [r0] ; r q
+ add r12, r2, #4
+ ; interleave the predictors
+ vld1.32 {d28[0]}, [r2], r1 ; l pre
+ vld1.32 {d28[1]}, [r12], r1 ; r pre
+ vld1.32 {d29[0]}, [r2], r1
+ vld1.32 {d29[1]}, [r12], r1
+ vld1.32 {d30[0]}, [r2], r1
+ vld1.32 {d30[1]}, [r12], r1
+ vld1.32 {d31[0]}, [r2]
+ vld1.32 {d31[1]}, [r12]
+
+ ldr r2, _CONSTANTS_
+
+ ; dequant: q[i] = q[i] * dq[i]
+ vmul.i16 q2, q2, q0
+ vmul.i16 q3, q3, q1
+ vmul.i16 q4, q4, q0
+ vmul.i16 q5, q5, q1
+
+ vld1.16 {d0}, [r2]
+
+ ; q2: l0r0 q3: l8r8
+ ; q4: l4r4 q5: l12r12
+ vswp d5, d8
+ vswp d7, d10
+
+ ; _CONSTANTS_ * 4,12 >> 16
+ ; q6: 4 * sinpi : c1/temp1
+ ; q7: 12 * sinpi : d1/temp2
+ ; q8: 4 * cospi
+ ; q9: 12 * cospi
+ vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2
+ vqdmulh.s16 q7, q5, d0[2]
+ vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1
+ vqdmulh.s16 q9, q5, d0[0]
+
+ vqadd.s16 q10, q2, q3 ; a1 = 0 + 8
+ vqsub.s16 q11, q2, q3 ; b1 = 0 - 8
+
+ ; vqdmulh only accepts signed values. this was a problem because
+ ; our constant had the high bit set, and was treated as a negative value.
+ ; vqdmulh also doubles the value before it shifts by 16. we need to
+ ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0,
+ ; so we can shift the constant without losing precision. this avoids
+ ; shift again afterward, but also avoids the sign issue. win win!
+ ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we
+ ; pre-shift it
+ vshr.s16 q8, q8, #1
+ vshr.s16 q9, q9, #1
+
+ ; q4: 4 + 4 * cospi : d1/temp1
+ ; q5: 12 + 12 * cospi : c1/temp2
+ vqadd.s16 q4, q4, q8
+ vqadd.s16 q5, q5, q9
+
+ ; c1 = temp1 - temp2
+ ; d1 = temp1 + temp2
+ vqsub.s16 q2, q6, q5
+ vqadd.s16 q3, q4, q7
+
+ ; [0]: a1+d1
+ ; [1]: b1+c1
+ ; [2]: b1-c1
+ ; [3]: a1-d1
+ vqadd.s16 q4, q10, q3
+ vqadd.s16 q5, q11, q2
+ vqsub.s16 q6, q11, q2
+ vqsub.s16 q7, q10, q3
+
+ ; rotate
+ vtrn.32 q4, q6
+ vtrn.32 q5, q7
+ vtrn.16 q4, q5
+ vtrn.16 q6, q7
+ ; idct loop 2
+ ; q4: l 0, 4, 8,12 r 0, 4, 8,12
+ ; q5: l 1, 5, 9,13 r 1, 5, 9,13
+ ; q6: l 2, 6,10,14 r 2, 6,10,14
+ ; q7: l 3, 7,11,15 r 3, 7,11,15
+
+ ; q8: 1 * sinpi : c1/temp1
+ ; q9: 3 * sinpi : d1/temp2
+ ; q10: 1 * cospi
+ ; q11: 3 * cospi
+ vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2
+ vqdmulh.s16 q9, q7, d0[2]
+ vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1
+ vqdmulh.s16 q11, q7, d0[0]
+
+ vqadd.s16 q2, q4, q6 ; a1 = 0 + 2
+ vqsub.s16 q3, q4, q6 ; b1 = 0 - 2
+
+ ; see note on shifting above
+ vshr.s16 q10, q10, #1
+ vshr.s16 q11, q11, #1
+
+ ; q10: 1 + 1 * cospi : d1/temp1
+ ; q11: 3 + 3 * cospi : c1/temp2
+ vqadd.s16 q10, q5, q10
+ vqadd.s16 q11, q7, q11
+
+ ; q8: c1 = temp1 - temp2
+ ; q9: d1 = temp1 + temp2
+ vqsub.s16 q8, q8, q11
+ vqadd.s16 q9, q10, q9
+
+ ; a1+d1
+ ; b1+c1
+ ; b1-c1
+ ; a1-d1
+ vqadd.s16 q4, q2, q9
+ vqadd.s16 q5, q3, q8
+ vqsub.s16 q6, q3, q8
+ vqsub.s16 q7, q2, q9
+
+ ; +4 >> 3 (rounding)
+ vrshr.s16 q4, q4, #3 ; lo
+ vrshr.s16 q5, q5, #3
+ vrshr.s16 q6, q6, #3 ; hi
+ vrshr.s16 q7, q7, #3
+
+ vtrn.32 q4, q6
+ vtrn.32 q5, q7
+ vtrn.16 q4, q5
+ vtrn.16 q6, q7
+
+ ; adding pre
+ ; input is still packed. pre was read interleaved
+ vaddw.u8 q4, q4, d28
+ vaddw.u8 q5, q5, d29
+ vaddw.u8 q6, q6, d30
+ vaddw.u8 q7, q7, d31
+
+ vmov.i16 q14, #0
+ vmov q15, q14
+ vst1.16 {q14, q15}, [r0] ; write over high input
+ sub r0, r0, #32
+ vst1.16 {q14, q15}, [r0] ; write over low input
+
+ ;saturate and narrow
+ vqmovun.s16 d0, q4 ; lo
+ vqmovun.s16 d1, q5
+ vqmovun.s16 d2, q6 ; hi
+ vqmovun.s16 d3, q7
+
+ ldr r1, [sp, #4] ; stride
+ add r2, r3, #4 ; hi
+ vst1.32 {d0[0]}, [r3], r1 ; lo
+ vst1.32 {d0[1]}, [r2], r1 ; hi
+ vst1.32 {d1[0]}, [r3], r1
+ vst1.32 {d1[1]}, [r2], r1
+ vst1.32 {d2[0]}, [r3], r1
+ vst1.32 {d2[1]}, [r2], r1
+ vst1.32 {d3[0]}, [r3]
+ vst1.32 {d3[1]}, [r2]
+
+ bx lr
+
+ ENDP ; |idct_dequant_full_2x_neon|
+
+; Constant Pool
+_CONSTANTS_ DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b
+; because the lowest bit in 0x8a8c is 0, we can pre-shift this
+sinpi8sqrt2 DCD 0x4546
+
+ END
diff --git a/vp8/decoder/dboolhuff.c b/vp8/decoder/dboolhuff.c
index 442054ed3..57cba16a3 100644
--- a/vp8/decoder/dboolhuff.c
+++ b/vp8/decoder/dboolhuff.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -12,7 +13,7 @@
#include "vpx_ports/mem.h"
#include "vpx_mem/vpx_mem.h"
-DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
+DECLARE_ALIGNED(16, const unsigned char, vp8dx_bitreader_norm[256]) =
{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -25,86 +26,41 @@ DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
};
-static void copy_in(BOOL_DECODER *br, unsigned int to_write)
-{
- if (to_write > br->user_buffer_sz)
- to_write = br->user_buffer_sz;
-
- memcpy(br->write_ptr, br->user_buffer, to_write);
- br->user_buffer += to_write;
- br->user_buffer_sz -= to_write;
- br->write_ptr = br_ptr_advance(br->write_ptr, to_write);
-}
-
int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source,
unsigned int source_sz)
{
- br->lowvalue = 0;
+ br->user_buffer_end = source+source_sz;
+ br->user_buffer = source;
+ br->value = 0;
+ br->count = -8;
br->range = 255;
- br->count = 0;
- br->user_buffer = source;
- br->user_buffer_sz = source_sz;
if (source_sz && !source)
return 1;
- /* Allocate the ring buffer backing store with alignment equal to the
- * buffer size*2 so that a single pointer can be used for wrapping rather
- * than a pointer+offset.
- */
- br->decode_buffer = vpx_memalign(VP8_BOOL_DECODER_SZ * 2,
- VP8_BOOL_DECODER_SZ);
-
- if (!br->decode_buffer)
- return 1;
-
/* Populate the buffer */
- br->read_ptr = br->decode_buffer;
- br->write_ptr = br->decode_buffer;
- copy_in(br, VP8_BOOL_DECODER_SZ);
+ vp8dx_bool_decoder_fill_c(br);
- /* Read the first byte */
- br->value = (*br->read_ptr++) << 8;
return 0;
}
void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br)
{
- int left, right;
-
- /* Find available room in the buffer */
- left = 0;
- right = br->read_ptr - br->write_ptr;
-
- if (right < 0)
- {
- /* Read pointer is behind the write pointer. We can write from the
- * write pointer to the end of the buffer.
- */
- right = VP8_BOOL_DECODER_SZ - (br->write_ptr - br->decode_buffer);
- left = br->read_ptr - br->decode_buffer;
- }
-
- if (right + left < 128)
- return;
-
- if (right)
- copy_in(br, right);
-
- if (left)
- {
- br->write_ptr = br->decode_buffer;
- copy_in(br, left);
- }
-
-}
-
-
-void vp8dx_stop_decode_c(BOOL_DECODER *bc)
-{
- vpx_free(bc->decode_buffer);
- bc->decode_buffer = 0;
+ const unsigned char *bufptr;
+ const unsigned char *bufend;
+ VP8_BD_VALUE value;
+ int count;
+ bufend = br->user_buffer_end;
+ bufptr = br->user_buffer;
+ value = br->value;
+ count = br->count;
+
+ VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
+
+ br->user_buffer = bufptr;
+ br->value = value;
+ br->count = count;
}
#if 0
@@ -119,13 +75,18 @@ void vp8dx_stop_decode_c(BOOL_DECODER *bc)
int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
{
unsigned int bit=0;
+ VP8_BD_VALUE value;
unsigned int split;
- unsigned int bigsplit;
- register unsigned int range = br->range;
- register unsigned int value = br->value;
+ VP8_BD_VALUE bigsplit;
+ int count;
+ unsigned int range;
+
+ value = br->value;
+ count = br->count;
+ range = br->range;
split = 1 + (((range-1) * probability) >> 8);
- bigsplit = (split<<8);
+ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
range = split;
if(value >= bigsplit)
@@ -143,21 +104,16 @@ int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
}*/
{
- int count = br->count;
register unsigned int shift = vp8dx_bitreader_norm[range];
range <<= shift;
value <<= shift;
count -= shift;
- if(count <= 0)
- {
- value |= (*br->read_ptr) << (-count);
- br->read_ptr = br_ptr_advance(br->read_ptr, 1);
- count += 8 ;
- }
- br->count = count;
}
br->value = value;
+ br->count = count;
br->range = range;
+ if (count < 0)
+ vp8dx_bool_decoder_fill_c(br);
return bit;
}
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index 772dbdb2e..c851aa7e5 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -1,60 +1,41 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
#ifndef DBOOLHUFF_H
#define DBOOLHUFF_H
+#include <stddef.h>
+#include <limits.h>
#include "vpx_ports/config.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
-/* Size of the bool decoder backing storage
- *
- * This size was chosen to be greater than the worst case encoding of a
- * single macroblock. This was calcluated as follows (python):
- *
- * def max_cost(prob):
- * return max(prob_costs[prob], prob_costs[255-prob]) / 256;
- *
- * tree_nodes_cost = 7 * max_cost(255)
- * extra_bits_cost = sum([max_cost(bit) for bit in extra_bits])
- * sign_bit_cost = max_cost(128)
- * total_cost = tree_nodes_cost + extra_bits_cost + sign_bit_cost
- *
- * where the prob_costs table was taken from the C vp8_prob_cost table in
- * boolhuff.c and the extra_bits table was taken from the 11 extrabits for
- * a category 6 token as defined in vp8d_token_extra_bits2/detokenize.c
- *
- * This equation produced a maximum of 79 bits per coefficient. Scaling up
- * to the macroblock level:
- *
- * 79 bits/coeff * 16 coeff/block * 25 blocks/macroblock = 31600 b/mb
- *
- * 4096 bytes = 32768 bits > 31600
- */
-#define VP8_BOOL_DECODER_SZ 4096
-#define VP8_BOOL_DECODER_MASK (VP8_BOOL_DECODER_SZ-1)
-#define VP8_BOOL_DECODER_PTR_MASK (~(uintptr_t)(VP8_BOOL_DECODER_SZ))
+typedef size_t VP8_BD_VALUE;
+
+# define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT)
+/*This is meant to be a large, positive constant that can still be efficiently
+ loaded as an immediate (on platforms like ARM, for example).
+ Even relatively modest values like 100 would work fine.*/
+# define VP8_LOTS_OF_BITS (0x40000000)
+
+
struct vp8_dboolhuff_rtcd_vtable;
typedef struct
{
- unsigned int lowvalue;
- unsigned int range;
- unsigned int value;
- int count;
+ const unsigned char *user_buffer_end;
const unsigned char *user_buffer;
- unsigned int user_buffer_sz;
- unsigned char *decode_buffer;
- const unsigned char *read_ptr;
- unsigned char *write_ptr;
+ VP8_BD_VALUE value;
+ int count;
+ unsigned int range;
#if CONFIG_RUNTIME_CPU_DETECT
struct vp8_dboolhuff_rtcd_vtable *rtcd;
#endif
@@ -62,10 +43,9 @@ typedef struct
#define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \
const unsigned char *source, unsigned int source_sz)
-#define prototype_dbool_stop(sym) void sym(BOOL_DECODER *bc)
#define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
#define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
-#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits);
+#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits)
#if ARCH_ARM
#include "arm/dboolhuff_arm.h"
@@ -75,10 +55,6 @@ typedef struct
#define vp8_dbool_start vp8dx_start_decode_c
#endif
-#ifndef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_c
-#endif
-
#ifndef vp8_dbool_fill
#define vp8_dbool_fill vp8dx_bool_decoder_fill_c
#endif
@@ -92,48 +68,35 @@ typedef struct
#endif
extern prototype_dbool_start(vp8_dbool_start);
-extern prototype_dbool_stop(vp8_dbool_stop);
extern prototype_dbool_fill(vp8_dbool_fill);
extern prototype_dbool_debool(vp8_dbool_debool);
extern prototype_dbool_devalue(vp8_dbool_devalue);
typedef prototype_dbool_start((*vp8_dbool_start_fn_t));
-typedef prototype_dbool_stop((*vp8_dbool_stop_fn_t));
typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t));
typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t));
typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t));
typedef struct vp8_dboolhuff_rtcd_vtable {
vp8_dbool_start_fn_t start;
- vp8_dbool_stop_fn_t stop;
vp8_dbool_fill_fn_t fill;
vp8_dbool_debool_fn_t debool;
vp8_dbool_devalue_fn_t devalue;
} vp8_dboolhuff_rtcd_vtable_t;
-// There are no processor-specific versions of these
-// functions right now. Disable RTCD to avoid using
-// function pointers which gives a speed boost
-//#ifdef ENABLE_RUNTIME_CPU_DETECT
-//#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn
-//#define IF_RTCD(x) (x)
-//#else
+/* There are no processor-specific versions of these
+ * functions right now. Disable RTCD to avoid using
+ * function pointers which gives a speed boost
+ */
+/*#ifdef ENABLE_RUNTIME_CPU_DETECT
+#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn
+#define IF_RTCD(x) (x)
+#else*/
#define DBOOLHUFF_INVOKE(ctx,fn) vp8_dbool_##fn
#define IF_RTCD(x) NULL
-//#endif
-
-static unsigned char *br_ptr_advance(const unsigned char *_ptr,
- unsigned int n)
-{
- uintptr_t ptr = (uintptr_t)_ptr;
-
- ptr += n;
- ptr &= VP8_BOOL_DECODER_PTR_MASK;
-
- return (void *)ptr;
-}
+/*#endif*/
-DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
+DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
/* wrapper functions to hide RTCD. static means inline means hopefully no
* penalty
@@ -146,12 +109,34 @@ static int vp8dx_start_decode(BOOL_DECODER *br,
#endif
return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz);
}
-static void vp8dx_stop_decode(BOOL_DECODER *br) {
- DBOOLHUFF_INVOKE(br->rtcd, stop)(br);
-}
static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
DBOOLHUFF_INVOKE(br->rtcd, fill)(br);
}
+
+/*The refill loop is used in several places, so define it in a macro to make
+ sure they're all consistent.
+ An inline function would be cleaner, but has a significant penalty, because
+ multiple BOOL_DECODER fields must be modified, and the compiler is not smart
+ enough to eliminate the stores to those fields and the subsequent reloads
+ from them when inlining the function.*/
+#define VP8DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \
+ do \
+ { \
+ int shift; \
+ for(shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); shift >= 0; ) \
+ { \
+ if((_bufptr) >= (_bufend)) { \
+ (_count) = VP8_LOTS_OF_BITS; \
+ break; \
+ } \
+ (_count) += 8; \
+ (_value) |= (VP8_BD_VALUE)*(_bufptr)++ << shift; \
+ shift -= 8; \
+ } \
+ } \
+ while(0)
+
+
static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
/*
* Until optimized versions of this function are available, we
@@ -160,13 +145,18 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
*return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability);
*/
unsigned int bit = 0;
+ VP8_BD_VALUE value;
unsigned int split;
- unsigned int bigsplit;
- register unsigned int range = br->range;
- register unsigned int value = br->value;
+ VP8_BD_VALUE bigsplit;
+ int count;
+ unsigned int range;
+
+ value = br->value;
+ count = br->count;
+ range = br->range;
split = 1 + (((range - 1) * probability) >> 8);
- bigsplit = (split << 8);
+ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
range = split;
@@ -185,23 +175,16 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
}*/
{
- int count = br->count;
register unsigned int shift = vp8dx_bitreader_norm[range];
range <<= shift;
value <<= shift;
count -= shift;
-
- if (count <= 0)
- {
- value |= (*br->read_ptr) << (-count);
- br->read_ptr = br_ptr_advance(br->read_ptr, 1);
- count += 8 ;
- }
-
- br->count = count;
}
br->value = value;
+ br->count = count;
br->range = range;
+ if(count < 0)
+ vp8dx_bool_decoder_fill(br);
return bit;
}
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index f151ef3cc..415392b68 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,10 +14,127 @@
#include "entropymode.h"
#include "onyxd_int.h"
#include "findnearmv.h"
-#include "demode.h"
+
#if CONFIG_DEBUG
#include <assert.h>
#endif
+static int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_bmode_tree, p);
+
+ return i;
+}
+
+
+static int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_ymode_tree, p);
+
+ return i;
+}
+
+static int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p);
+
+ return i;
+}
+
+
+
+static int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p);
+
+ return i;
+}
+
+static void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
+{
+ /* Is segmentation enabled */
+ if (x->segmentation_enabled && x->update_mb_segmentation_map)
+ {
+ /* If so then read the segment id. */
+ if (vp8_read(r, x->mb_segment_tree_probs[0]))
+ mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
+ else
+ mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1]));
+ }
+}
+
+static void vp8_kfread_modes(VP8D_COMP *pbi, MODE_INFO *m, int mb_row, int mb_col)
+{
+ vp8_reader *const bc = & pbi->bc;
+ const int mis = pbi->common.mode_info_stride;
+
+ {
+ MB_PREDICTION_MODE y_mode;
+
+ /* Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
+ * By default on a key frame reset all MBs to segment 0
+ */
+ m->mbmi.segment_id = 0;
+
+ if (pbi->mb.update_mb_segmentation_map)
+ vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
+
+ /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */
+ if (pbi->common.mb_no_coeff_skip)
+ m->mbmi.mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
+ else
+ m->mbmi.mb_skip_coeff = 0;
+
+ y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, pbi->common.kf_ymode_prob);
+
+ m->mbmi.ref_frame = INTRA_FRAME;
+
+ if ((m->mbmi.mode = y_mode) == B_PRED)
+ {
+ int i = 0;
+
+ do
+ {
+ const B_PREDICTION_MODE A = vp8_above_bmi(m, i, mis)->mode;
+ const B_PREDICTION_MODE L = vp8_left_bmi(m, i)->mode;
+
+ m->bmi[i].mode = (B_PREDICTION_MODE) vp8_read_bmode(bc, pbi->common.kf_bmode_prob [A] [L]);
+ }
+ while (++i < 16);
+ }
+ else
+ {
+ int BMode;
+ int i = 0;
+
+ switch (y_mode)
+ {
+ case DC_PRED:
+ BMode = B_DC_PRED;
+ break;
+ case V_PRED:
+ BMode = B_VE_PRED;
+ break;
+ case H_PRED:
+ BMode = B_HE_PRED;
+ break;
+ case TM_PRED:
+ BMode = B_TM_PRED;
+ break;
+ default:
+ BMode = B_DC_PRED;
+ break;
+ }
+
+ do
+ {
+ m->bmi[i].mode = (B_PREDICTION_MODE)BMode;
+ }
+ while (++i < 16);
+ }
+
+ m->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.kf_uv_mode_prob);
+ }
+}
static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
{
@@ -98,6 +216,8 @@ static MB_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p)
return (MB_PREDICTION_MODE)i;
}
+
+#ifdef VPX_MODE_COUNT
unsigned int vp8_mv_cont_count[5][4] =
{
{ 0, 0, 0, 0 },
@@ -106,87 +226,108 @@ unsigned int vp8_mv_cont_count[5][4] =
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 }
};
+#endif
-void vp8_decode_mode_mvs(VP8D_COMP *pbi)
-{
- const MV Zero = { 0, 0};
+unsigned char vp8_mbsplit_offset[4][16] = {
+ { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
+};
- VP8_COMMON *const pc = & pbi->common;
- vp8_reader *const bc = & pbi->bc;
- MACROBLOCKD *xd = &pbi->mb;
- MODE_INFO *mi = pc->mi, *ms;
- const int mis = pc->mode_info_stride;
+unsigned char vp8_mbsplit_fill_count[4] = {8, 8, 4, 1};
+unsigned char vp8_mbsplit_fill_offset[4][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+ { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15},
+ { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15},
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
+};
- MV_CONTEXT *const mvc = pc->fc.mvc;
- int mb_row = -1;
+
+void vp8_mb_mode_mv_init(VP8D_COMP *pbi)
+{
+ vp8_reader *const bc = & pbi->bc;
+ MV_CONTEXT *const mvc = pbi->common.fc.mvc;
#if CONFIG_SEGMENTATION
- int left_id, above_id;
- int i;
- int sum;
- int index = 0;
+ MACROBLOCKD *const xd = & pbi->mb;
#endif
- vp8_prob prob_intra;
- vp8_prob prob_last;
- vp8_prob prob_gf;
- vp8_prob prob_skip_false = 0;
-
- if (pc->mb_no_coeff_skip)
- prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8);
-
- prob_intra = (vp8_prob)vp8_read_literal(bc, 8);
- prob_last = (vp8_prob)vp8_read_literal(bc, 8);
- prob_gf = (vp8_prob)vp8_read_literal(bc, 8);
- ms = pc->mi - 1;
+ pbi->prob_skip_false = 0;
+ if (pbi->common.mb_no_coeff_skip)
+ pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8);
- if (vp8_read_bit(bc))
+ if(pbi->common.frame_type != KEY_FRAME)
{
- int i = 0;
+ pbi->prob_intra = (vp8_prob)vp8_read_literal(bc, 8);
+ pbi->prob_last = (vp8_prob)vp8_read_literal(bc, 8);
+ pbi->prob_gf = (vp8_prob)vp8_read_literal(bc, 8);
- do
+ if (vp8_read_bit(bc))
{
- pc->fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
- }
- while (++i < 4);
- }
+ int i = 0;
- if (vp8_read_bit(bc))
- {
- int i = 0;
+ do
+ {
+ pbi->common.fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+ }
+ while (++i < 4);
+ }
- do
+ if (vp8_read_bit(bc))
{
- pc->fc.uv_mode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+ int i = 0;
+
+ do
+ {
+ pbi->common.fc.uv_mode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+ }
+ while (++i < 3);
}
- while (++i < 3);
- }
- read_mvcontexts(bc, mvc);
+ read_mvcontexts(bc, mvc);
#if CONFIG_SEGMENTATION
xd->temporal_update = vp8_read_bit(bc);
#endif
- while (++mb_row < pc->mb_rows)
- {
- int mb_col = -1;
+ }
+}
- while (++mb_col < pc->mb_cols)
- {
- MB_MODE_INFO *const mbmi = & mi->mbmi;
- MV *const mv = & mbmi->mv.as_mv;
- VP8_COMMON *const pc = &pbi->common;
- // MACROBLOCKD *xd = &pbi->mb;
-
- vp8dx_bool_decoder_fill(bc);
- // Distance of Mb to the various image edges.
- // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
- xd->mb_to_left_edge = -((mb_col * 16) << 3);
- xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
- xd->mb_to_top_edge = -((mb_row * 16)) << 3;
- xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
-
- // If required read in new segmentation data for this MB
- if (pbi->mb.update_mb_segmentation_map)
+void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
+ int mb_row, int mb_col)
+{
+ const MV Zero = { 0, 0};
+ vp8_reader *const bc = & pbi->bc;
+ MV_CONTEXT *const mvc = pbi->common.fc.mvc;
+ const int mis = pbi->common.mode_info_stride;
+#if CONFIG_SEGMENTATION
+ MACROBLOCKD *const xd = & pbi->mb;
+ int sum;
+ int index = mb_row * pbi->common.mb_cols + mb_col;
+#endif
+ MV *const mv = & mbmi->mv.as_mv;
+ int mb_to_left_edge;
+ int mb_to_right_edge;
+ int mb_to_top_edge;
+ int mb_to_bottom_edge;
+
+ mb_to_top_edge = pbi->mb.mb_to_top_edge;
+ mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge;
+ mb_to_top_edge -= LEFT_TOP_MARGIN;
+ mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+ mbmi->need_to_clamp_mvs = 0;
+ /* Distance of Mb to the various image edges.
+ * These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
+ */
+ pbi->mb.mb_to_left_edge =
+ mb_to_left_edge = -((mb_col * 16) << 3);
+ mb_to_left_edge -= LEFT_TOP_MARGIN;
+
+ pbi->mb.mb_to_right_edge =
+ mb_to_right_edge = ((pbi->common.mb_cols - 1 - mb_col) * 16) << 3;
+ mb_to_right_edge += RIGHT_BOTTOM_MARGIN;
+
+ /* If required read in new segmentation data for this MB */
+ if (pbi->mb.update_mb_segmentation_map)
{
#if CONFIG_SEGMENTATION
if (xd->temporal_update)
@@ -196,7 +337,7 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
if (mb_col != 0)
sum += (mi-1)->mbmi.segment_flag;
if (mb_row != 0)
- sum += (mi-pc->mb_cols)->mbmi.segment_flag;
+ sum += (mi-pbi->common.mb_cols)->mbmi.segment_flag;
if (vp8_read(bc, xd->mb_segment_tree_probs[3+sum]) == 0)
{
@@ -223,236 +364,237 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
}
- // Read the macroblock coeff skip flag if this feature is in use, else default to 0
- if (pc->mb_no_coeff_skip)
- mbmi->mb_skip_coeff = vp8_read(bc, prob_skip_false);
- else
- mbmi->mb_skip_coeff = 0;
-
- mbmi->uv_mode = DC_PRED;
-
- if ((mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, prob_intra))) /* inter MB */
- {
- int rct[4];
- vp8_prob mv_ref_p [VP8_MVREFS-1];
- MV nearest, nearby, best_mv;
-
- if (vp8_read(bc, prob_last))
- {
- mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)mbmi->ref_frame + (int)(1 + vp8_read(bc, prob_gf)));
- }
-
- vp8_find_near_mvs(xd, mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, pbi->common.ref_frame_sign_bias);
-
- vp8_mv_ref_probs(mv_ref_p, rct);
-
- switch (mbmi->mode = read_mv_ref(bc, mv_ref_p))
- {
- case SPLITMV:
- {
- const int s = mbmi->partitioning = vp8_treed_read(
- bc, vp8_mbsplit_tree, vp8_mbsplit_probs
- );
- const int num_p = vp8_mbsplit_count [s];
- const int *const L = vp8_mbsplits [s];
- int j = 0;
-
- do /* for each subset j */
- {
- B_MODE_INFO *const bmi = mbmi->partition_bmi + j;
- MV *const mv = & bmi->mv.as_mv;
-
- int k = -1; /* first block in subset j */
- int mv_contz;
-
- while (j != L[++k])
- if (k >= 16)
-#if CONFIG_DEBUG
- assert(0);
-
-#else
- ;
-#endif
-
- mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
-
- switch (bmi->mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
- {
- case NEW4X4:
- read_mv(bc, mv, (const MV_CONTEXT *) mvc);
- mv->row += best_mv.row;
- mv->col += best_mv.col;
-#ifdef VPX_MODE_COUNT
- vp8_mv_cont_count[mv_contz][3]++;
-#endif
- break;
- case LEFT4X4:
- *mv = vp8_left_bmi(mi, k)->mv.as_mv;
-#ifdef VPX_MODE_COUNT
- vp8_mv_cont_count[mv_contz][0]++;
-#endif
- break;
- case ABOVE4X4:
- *mv = vp8_above_bmi(mi, k, mis)->mv.as_mv;
-#ifdef VPX_MODE_COUNT
- vp8_mv_cont_count[mv_contz][1]++;
-#endif
- break;
- case ZERO4X4:
- *mv = Zero;
-#ifdef VPX_MODE_COUNT
- vp8_mv_cont_count[mv_contz][2]++;
-#endif
- break;
- default:
- break;
- }
-
- /* Fill (uniform) modes, mvs of jth subset.
- Must do it here because ensuing subsets can
- refer back to us via "left" or "above". */
- do
- if (j == L[k])
- mi->bmi[k] = *bmi;
-
- while (++k < 16);
- }
- while (++j < num_p);
- }
-
- *mv = mi->bmi[15].mv.as_mv;
+ /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */
+ if (pbi->common.mb_no_coeff_skip)
+ mbmi->mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
+ else
+ mbmi->mb_skip_coeff = 0;
- break; /* done with SPLITMV */
-
- case NEARMV:
- *mv = nearby;
-
- // Clip "next_nearest" so that it does not extend to far out of image
- if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
- mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
- else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
- mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
+ if ((mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra))) /* inter MB */
+ {
+ int rct[4];
+ vp8_prob mv_ref_p [VP8_MVREFS-1];
+ MV nearest, nearby, best_mv;
- if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
- mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
- else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
- mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
+ if (vp8_read(bc, pbi->prob_last))
+ {
+ mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)mbmi->ref_frame + (int)(1 + vp8_read(bc, pbi->prob_gf)));
+ }
- goto propagate_mv;
+ vp8_find_near_mvs(&pbi->mb, mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, pbi->common.ref_frame_sign_bias);
- case NEARESTMV:
- *mv = nearest;
+ vp8_mv_ref_probs(mv_ref_p, rct);
- // Clip "next_nearest" so that it does not extend to far out of image
- if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
- mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
- else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
- mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
+ mbmi->uv_mode = DC_PRED;
+ switch (mbmi->mode = read_mv_ref(bc, mv_ref_p))
+ {
+ case SPLITMV:
+ {
+ const int s = mbmi->partitioning =
+ vp8_treed_read(bc, vp8_mbsplit_tree, vp8_mbsplit_probs);
+ const int num_p = vp8_mbsplit_count [s];
+ int j = 0;
- if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
- mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
- else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
- mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
+ do /* for each subset j */
+ {
+ B_MODE_INFO bmi;
+ MV *const mv = & bmi.mv.as_mv;
- goto propagate_mv;
+ int k; /* first block in subset j */
+ int mv_contz;
+ k = vp8_mbsplit_offset[s][j];
- case ZEROMV:
- *mv = Zero;
- goto propagate_mv;
+ mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
- case NEWMV:
+ switch (bmi.mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) /*pc->fc.sub_mv_ref_prob))*/
+ {
+ case NEW4X4:
read_mv(bc, mv, (const MV_CONTEXT *) mvc);
mv->row += best_mv.row;
mv->col += best_mv.col;
- /* Encoder should not produce invalid motion vectors, but since
- * arbitrary length MVs can be parsed from the bitstream, we
- * need to clamp them here in case we're reading bad data to
- * avoid a crash.
- */
-#if CONFIG_DEBUG
- assert(mv->col >= (xd->mb_to_left_edge - LEFT_TOP_MARGIN));
- assert(mv->col <= (xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN));
- assert(mv->row >= (xd->mb_to_top_edge - LEFT_TOP_MARGIN));
- assert(mv->row <= (xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN));
-#endif
+ #ifdef VPX_MODE_COUNT
+ vp8_mv_cont_count[mv_contz][3]++;
+ #endif
+ break;
+ case LEFT4X4:
+ *mv = vp8_left_bmi(mi, k)->mv.as_mv;
+ #ifdef VPX_MODE_COUNT
+ vp8_mv_cont_count[mv_contz][0]++;
+ #endif
+ break;
+ case ABOVE4X4:
+ *mv = vp8_above_bmi(mi, k, mis)->mv.as_mv;
+ #ifdef VPX_MODE_COUNT
+ vp8_mv_cont_count[mv_contz][1]++;
+ #endif
+ break;
+ case ZERO4X4:
+ *mv = Zero;
+ #ifdef VPX_MODE_COUNT
+ vp8_mv_cont_count[mv_contz][2]++;
+ #endif
+ break;
+ default:
+ break;
+ }
- if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
- mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
- else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
- mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
+ mbmi->need_to_clamp_mvs |= (mv->col < mb_to_left_edge) ? 1 : 0;
+ mbmi->need_to_clamp_mvs |= (mv->col > mb_to_right_edge) ? 1 : 0;
+ mbmi->need_to_clamp_mvs |= (mv->row < mb_to_top_edge) ? 1 : 0;
+ mbmi->need_to_clamp_mvs |= (mv->row > mb_to_bottom_edge) ? 1 : 0;
- if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
- mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
- else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
- mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
+ {
+ /* Fill (uniform) modes, mvs of jth subset.
+ Must do it here because ensuing subsets can
+ refer back to us via "left" or "above". */
+ unsigned char *fill_offset;
+ unsigned int fill_count = vp8_mbsplit_fill_count[s];
- propagate_mv: /* same MV throughout */
- {
- //int i=0;
- //do
- //{
- // mi->bmi[i].mv.as_mv = *mv;
- //}
- //while( ++i < 16);
-
- mi->bmi[0].mv.as_mv = *mv;
- mi->bmi[1].mv.as_mv = *mv;
- mi->bmi[2].mv.as_mv = *mv;
- mi->bmi[3].mv.as_mv = *mv;
- mi->bmi[4].mv.as_mv = *mv;
- mi->bmi[5].mv.as_mv = *mv;
- mi->bmi[6].mv.as_mv = *mv;
- mi->bmi[7].mv.as_mv = *mv;
- mi->bmi[8].mv.as_mv = *mv;
- mi->bmi[9].mv.as_mv = *mv;
- mi->bmi[10].mv.as_mv = *mv;
- mi->bmi[11].mv.as_mv = *mv;
- mi->bmi[12].mv.as_mv = *mv;
- mi->bmi[13].mv.as_mv = *mv;
- mi->bmi[14].mv.as_mv = *mv;
- mi->bmi[15].mv.as_mv = *mv;
- }
+ fill_offset = &vp8_mbsplit_fill_offset[s][(unsigned char)j * vp8_mbsplit_fill_count[s]];
- break;
+ do {
+ mi->bmi[ *fill_offset] = bmi;
+ fill_offset++;
- default:;
-#if CONFIG_DEBUG
- assert(0);
-#endif
+ }while (--fill_count);
}
}
- else
- {
- /* MB is intra coded */
-
- int j = 0;
+ while (++j < num_p);
+ }
+ *mv = mi->bmi[15].mv.as_mv;
+
+ break; /* done with SPLITMV */
+
+ case NEARMV:
+ *mv = nearby;
+ /* Clip "next_nearest" so that it does not extend to far out of image */
+ mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col;
+ mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col;
+ mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row;
+ mv->row = (mv->row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->row;
+ goto propagate_mv;
+
+ case NEARESTMV:
+ *mv = nearest;
+ /* Clip "next_nearest" so that it does not extend to far out of image */
+ mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col;
+ mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col;
+ mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row;
+ mv->row = (mv->row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->row;
+ goto propagate_mv;
+
+ case ZEROMV:
+ *mv = Zero;
+ goto propagate_mv;
+
+ case NEWMV:
+ read_mv(bc, mv, (const MV_CONTEXT *) mvc);
+ mv->row += best_mv.row;
+ mv->col += best_mv.col;
+
+ /* Don't need to check this on NEARMV and NEARESTMV modes
+ * since those modes clamp the MV. The NEWMV mode does not,
+ * so signal to the prediction stage whether special
+ * handling may be required.
+ */
+ mbmi->need_to_clamp_mvs = (mv->col < mb_to_left_edge) ? 1 : 0;
+ mbmi->need_to_clamp_mvs |= (mv->col > mb_to_right_edge) ? 1 : 0;
+ mbmi->need_to_clamp_mvs |= (mv->row < mb_to_top_edge) ? 1 : 0;
+ mbmi->need_to_clamp_mvs |= (mv->row > mb_to_bottom_edge) ? 1 : 0;
+
+ propagate_mv: /* same MV throughout */
+ {
+ /*int i=0;
do
{
- mi->bmi[j].mv.as_mv = Zero;
+ mi->bmi[i].mv.as_mv = *mv;
}
- while (++j < 16);
+ while( ++i < 16);*/
+
+ mi->bmi[0].mv.as_mv = *mv;
+ mi->bmi[1].mv.as_mv = *mv;
+ mi->bmi[2].mv.as_mv = *mv;
+ mi->bmi[3].mv.as_mv = *mv;
+ mi->bmi[4].mv.as_mv = *mv;
+ mi->bmi[5].mv.as_mv = *mv;
+ mi->bmi[6].mv.as_mv = *mv;
+ mi->bmi[7].mv.as_mv = *mv;
+ mi->bmi[8].mv.as_mv = *mv;
+ mi->bmi[9].mv.as_mv = *mv;
+ mi->bmi[10].mv.as_mv = *mv;
+ mi->bmi[11].mv.as_mv = *mv;
+ mi->bmi[12].mv.as_mv = *mv;
+ mi->bmi[13].mv.as_mv = *mv;
+ mi->bmi[14].mv.as_mv = *mv;
+ mi->bmi[15].mv.as_mv = *mv;
+ }
+ break;
+ default:;
+ #if CONFIG_DEBUG
+ assert(0);
+ #endif
+ }
+ }
+ else
+ {
+ /* MB is intra coded */
+ int j = 0;
+ do
+ {
+ mi->bmi[j].mv.as_mv = Zero;
+ }
+ while (++j < 16);
- *mv = Zero;
+ if ((mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pbi->common.fc.ymode_prob)) == B_PRED)
+ {
+ j = 0;
+ do
+ {
+ mi->bmi[j].mode = (B_PREDICTION_MODE)vp8_read_bmode(bc, pbi->common.fc.bmode_prob);
+ }
+ while (++j < 16);
+ }
- if ((mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pc->fc.ymode_prob)) == B_PRED)
- {
- int j = 0;
+ mbmi->uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.fc.uv_mode_prob);
+ }
- do
- {
- mi->bmi[j].mode = (B_PREDICTION_MODE)vp8_read_bmode(bc, pc->fc.bmode_prob);
- }
- while (++j < 16);
- }
+}
- mbmi->uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pc->fc.uv_mode_prob);
- }
+void vp8_decode_mode_mvs(VP8D_COMP *pbi)
+{
+ MODE_INFO *mi = pbi->common.mi;
+ int mb_row = -1;
+
+ vp8_mb_mode_mv_init(pbi);
+
+ while (++mb_row < pbi->common.mb_rows)
+ {
+ int mb_col = -1;
+ int mb_to_top_edge;
+ int mb_to_bottom_edge;
- mi++; // next macroblock
+ pbi->mb.mb_to_top_edge =
+ mb_to_top_edge = -((mb_row * 16)) << 3;
+ mb_to_top_edge -= LEFT_TOP_MARGIN;
+
+ pbi->mb.mb_to_bottom_edge =
+ mb_to_bottom_edge = ((pbi->common.mb_rows - 1 - mb_row) * 16) << 3;
+ mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+
+ while (++mb_col < pbi->common.mb_cols)
+ {
+ /*vp8_read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);*/
+ if(pbi->common.frame_type == KEY_FRAME)
+ vp8_kfread_modes(pbi, mi, mb_row, mb_col);
+ else
+ vp8_read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);
+
+ mi++; /* next macroblock */
}
- mi++; // skip left predictor each row
+ mi++; /* skip left predictor each row */
}
}
+
diff --git a/vp8/decoder/decodemv.h b/vp8/decoder/decodemv.h
index 403007183..940342447 100644
--- a/vp8/decoder/decodemv.h
+++ b/vp8/decoder/decodemv.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/decoder/decoderthreading.h b/vp8/decoder/decoderthreading.h
index ebc5c27b2..25dee8fe8 100644
--- a/vp8/decoder/decoderthreading.h
+++ b/vp8/decoder/decoderthreading.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -14,11 +15,12 @@
#ifndef _DECODER_THREADING_H
#define _DECODER_THREADING_H
-
-extern void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
- MACROBLOCKD *xd);
-extern void vp8_stop_lfthread(VP8D_COMP *pbi);
-extern void vp8_start_lfthread(VP8D_COMP *pbi);
+#if CONFIG_MULTITHREAD
+extern void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd);
extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);
extern void vp8_decoder_create_threads(VP8D_COMP *pbi);
+extern int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);
+extern void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows);
+#endif
+
#endif
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 01cd7aedf..06204fec6 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -20,9 +21,10 @@
#include "alloccommon.h"
#include "entropymode.h"
#include "quant_common.h"
-#include "segmentation_common.h"
+#include "vpx_scale/vpxscale.h"
+#include "vpx_scale/yv12extend.h"
#include "setupintrarecon.h"
-#include "demode.h"
+
#include "decodemv.h"
#include "extend.h"
#include "vpx_mem/vpx_mem.h"
@@ -38,56 +40,53 @@
void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
{
- int r, c;
int i;
int Q;
VP8_COMMON *const pc = & pbi->common;
for (Q = 0; Q < QINDEX_RANGE; Q++)
{
- pc->Y1dequant[Q][0][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q);
- pc->Y2dequant[Q][0][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
- pc->UVdequant[Q][0][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
+ pc->Y1dequant[Q][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q);
+ pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
+ pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
- // all the ac values = ;
+ /* all the ac values = ; */
for (i = 1; i < 16; i++)
{
int rc = vp8_default_zig_zag1d[i];
- r = (rc >> 2);
- c = (rc & 3);
- pc->Y1dequant[Q][r][c] = (short)vp8_ac_yquant(Q);
- pc->Y2dequant[Q][r][c] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
- pc->UVdequant[Q][r][c] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
+ pc->Y1dequant[Q][rc] = (short)vp8_ac_yquant(Q);
+ pc->Y2dequant[Q][rc] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
+ pc->UVdequant[Q][rc] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
}
}
}
-static void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
+void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
{
int i;
int QIndex;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
VP8_COMMON *const pc = & pbi->common;
- // Decide whether to use the default or alternate baseline Q value.
+ /* Decide whether to use the default or alternate baseline Q value. */
if (xd->segmentation_enabled)
{
- // Abs Value
+ /* Abs Value */
if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
- // Delta Value
+ /* Delta Value */
else
{
QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
- QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range
+ QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */
}
}
else
QIndex = pc->base_qindex;
- // Set up the block level dequant pointers
+ /* Set up the block level dequant pointers */
for (i = 0; i < 16; i++)
{
xd->block[i].dequant = pc->Y1dequant[QIndex];
@@ -108,11 +107,12 @@ static void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
#define RTCD_VTABLE(x) NULL
#endif
-//skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
-// to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
+/* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
+ * to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
+ */
static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
{
- if (xd->frame_type == KEY_FRAME || xd->mbmi.ref_frame == INTRA_FRAME)
+ if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
vp8_build_intra_predictors_mbuv_s(xd);
@@ -125,42 +125,114 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
}
}
-static void reconstruct_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
+static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
+{
+ /* If the MV points so far into the UMV border that no visible pixels
+ * are used for reconstruction, the subpel part of the MV can be
+ * discarded and the MV limited to 16 pixels with equivalent results.
+ *
+ * This limit kicks in at 19 pixels for the top and left edges, for
+ * the 16 pixels plus 3 taps right of the central pixel when subpel
+ * filtering. The bottom and right edges use 16 pixels plus 2 pixels
+ * left of the central pixel when filtering.
+ */
+ if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
+ mv->col = xd->mb_to_left_edge - (16 << 3);
+ else if (mv->col > xd->mb_to_right_edge + (18 << 3))
+ mv->col = xd->mb_to_right_edge + (16 << 3);
+
+ if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
+ mv->row = xd->mb_to_top_edge - (16 << 3);
+ else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
+ mv->row = xd->mb_to_bottom_edge + (16 << 3);
+}
+
+/* A version of the above function for chroma block MVs.*/
+static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
+{
+ mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ? (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
+ mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ? (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;
+
+ mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ? (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
+ mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ? (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
+}
+
+void clamp_mvs(MACROBLOCKD *xd)
{
- if (xd->frame_type == KEY_FRAME || xd->mbmi.ref_frame == INTRA_FRAME)
+ if (xd->mode_info_context->mbmi.mode == SPLITMV)
+ {
+ int i;
+
+ for (i=0; i<16; i++)
+ clamp_mv_to_umv_border(&xd->block[i].bmi.mv.as_mv, xd);
+ for (i=16; i<24; i++)
+ clamp_uvmv_to_umv_border(&xd->block[i].bmi.mv.as_mv, xd);
+ }
+ else
+ {
+ clamp_mv_to_umv_border(&xd->mode_info_context->mbmi.mv.as_mv, xd);
+ clamp_uvmv_to_umv_border(&xd->block[16].bmi.mv.as_mv, xd);
+ }
+
+}
+
+void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
+{
+ int eobtotal = 0;
+ int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
+
+ if (xd->mode_info_context->mbmi.mb_skip_coeff)
+ {
+ vp8_reset_mb_tokens_context(xd);
+ }
+ else
+ {
+ eobtotal = vp8_decode_mb_tokens(pbi, xd);
+ }
+
+ /* Perform temporary clamping of the MV to be used for prediction */
+ if (do_clamp)
+ {
+ clamp_mvs(xd);
+ }
+
+ xd->mode_info_context->mbmi.dc_diff = 1;
+
+ if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
+ {
+ xd->mode_info_context->mbmi.dc_diff = 0;
+ skip_recon_mb(pbi, xd);
+ return;
+ }
+
+ if (xd->segmentation_enabled)
+ mb_init_dequantizer(pbi, xd);
+
+ /* do prediction */
+ if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
vp8_build_intra_predictors_mbuv(xd);
- if (xd->mbmi.mode != B_PRED)
+ if (xd->mode_info_context->mbmi.mode != B_PRED)
{
vp8_build_intra_predictors_mby_ptr(xd);
- vp8_recon16x16mb(RTCD_VTABLE(recon), xd);
- }
- else
- {
- vp8_recon_intra4x4mb(RTCD_VTABLE(recon), xd);
+ } else {
+ vp8_intra_prediction_down_copy(xd);
}
}
else
{
vp8_build_inter_predictors_mb(xd);
- vp8_recon16x16mb(RTCD_VTABLE(recon), xd);
}
-}
-
-static void de_quantand_idct(VP8D_COMP *pbi, MACROBLOCKD *xd)
-{
- int i;
- BLOCKD *b = &xd->block[24];
-
-
- if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV)
+ /* dequantization and idct */
+ if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
{
+ BLOCKD *b = &xd->block[24];
DEQUANT_INVOKE(&pbi->dequant, block)(b);
- // do 2nd order transform on the dc block
- if (b->eob > 1)
+ /* do 2nd order transform on the dc block */
+ if (xd->eobs[24] > 1)
{
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
((int *)b->qcoeff)[0] = 0;
@@ -178,86 +250,50 @@ static void de_quantand_idct(VP8D_COMP *pbi, MACROBLOCKD *xd)
((int *)b->qcoeff)[0] = 0;
}
-
- for (i = 0; i < 16; i++)
- {
-
- b = &xd->block[i];
-
- if (b->eob > 1)
- {
- DEQUANT_INVOKE(&pbi->dequant, idct_dc)(b->qcoeff, &b->dequant[0][0], b->diff, 32, xd->block[24].diff[i]);
- }
- else
- {
- IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(xd->block[24].diff[i], b->diff, 32);
- }
- }
-
- for (i = 16; i < 24; i++)
- {
- b = &xd->block[i];
-
- if (b->eob > 1)
- {
- DEQUANT_INVOKE(&pbi->dequant, idct)(b->qcoeff, &b->dequant[0][0], b->diff, 16);
- }
- else
- {
- IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(b->qcoeff[0] * b->dequant[0][0], b->diff, 16);
- ((int *)b->qcoeff)[0] = 0;
- }
- }
+ DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
+ (xd->qcoeff, xd->block[0].dequant,
+ xd->predictor, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->eobs, xd->block[24].diff);
}
- else
+ else if ((xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
{
- for (i = 0; i < 24; i++)
+ for (i = 0; i < 16; i++)
{
- b = &xd->block[i];
+ BLOCKD *b = &xd->block[i];
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- if (b->eob > 1)
+ if (xd->eobs[i] > 1)
{
- DEQUANT_INVOKE(&pbi->dequant, idct)(b->qcoeff, &b->dequant[0][0], b->diff, (32 - (i & 16)));
+ DEQUANT_INVOKE(&pbi->dequant, idct_add)
+ (b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
}
else
{
- IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(b->qcoeff[0] * b->dequant[0][0], b->diff, (32 - (i & 16)));
+ IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
+ (b->qcoeff[0] * b->dequant[0], b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
((int *)b->qcoeff)[0] = 0;
}
}
- }
-}
-
-void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
-{
- int eobtotal = 0;
- if (xd->mbmi.mb_skip_coeff)
- {
- vp8_reset_mb_tokens_context(xd);
}
else
{
- eobtotal = vp8_decode_mb_tokens(pbi, xd);
- }
-
- xd->mode_info_context->mbmi.dc_diff = 1;
-
- if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV && eobtotal == 0)
- {
- xd->mode_info_context->mbmi.dc_diff = 0;
- skip_recon_mb(pbi, xd);
- return;
+ DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
+ (xd->qcoeff, xd->block[0].dequant,
+ xd->predictor, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->eobs);
}
- if (xd->segmentation_enabled)
- mb_init_dequantizer(pbi, xd);
-
- de_quantand_idct(pbi, xd);
- reconstruct_mb(pbi, xd);
+ DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
+ (xd->qcoeff+16*16, xd->block[16].dequant,
+ xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.uv_stride, xd->eobs+16);
}
+
static int get_delta_q(vp8_reader *bc, int prev, int *q_update)
{
int ret_val = 0;
@@ -293,18 +329,17 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
int i;
int recon_yoffset, recon_uvoffset;
int mb_col;
- int recon_y_stride = pc->last_frame.y_stride;
- int recon_uv_stride = pc->last_frame.uv_stride;
+ int ref_fb_idx = pc->lst_fb_idx;
+ int dst_fb_idx = pc->new_fb_idx;
+ int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
- vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
+ vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
recon_yoffset = mb_row * recon_y_stride * 16;
recon_uvoffset = mb_row * recon_uv_stride * 8;
- // reset above block coeffs
+ /* reset above block coeffs */
- xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
- xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
- xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
- xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
+ xd->above_context = pc->above_context;
xd->up_available = (mb_row != 0);
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
@@ -312,10 +347,8 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
{
- // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
- vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) );
- if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
+ if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
{
for (i = 0; i < 16; i++)
{
@@ -324,48 +357,38 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
}
}
- // Distance of Mb to the various image edges.
- // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ */
xd->mb_to_left_edge = -((mb_col * 16) << 3);
xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
- xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
- xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
- xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
+ xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
- // Select the appropriate reference frame for this MB
- if (xd->mbmi.ref_frame == LAST_FRAME)
- {
- xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
- }
- else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
- {
- // Golden frame reconstruction buffer
- xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
- }
+ /* Select the appropriate reference frame for this MB */
+ if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ ref_fb_idx = pc->lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+ ref_fb_idx = pc->gld_fb_idx;
else
- {
- // Alternate reference frame reconstruction buffer
- xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
- }
+ ref_fb_idx = pc->alt_fb_idx;
+
+ xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
vp8_build_uvmvs(xd, pc->full_pixel);
/*
- if(pbi->common.current_video_frame==0 &&mb_col==1 && mb_row==0)
+ if(pc->current_video_frame==0 &&mb_col==1 && mb_row==0)
pbi->debugoutput =1;
else
pbi->debugoutput =0;
*/
- vp8dx_bool_decoder_fill(xd->current_bc);
vp8_decode_macroblock(pbi, xd);
@@ -374,25 +397,17 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
++xd->mode_info_context; /* next mb */
- xd->gf_active_ptr++; // GF useage flag for next MB
+ xd->above_context++;
- xd->above_context[Y1CONTEXT] += 4;
- xd->above_context[UCONTEXT ] += 2;
- xd->above_context[VCONTEXT ] += 2;
- xd->above_context[Y2CONTEXT] ++;
-
- pbi->current_mb_col_main = mb_col;
}
- // adjust to the next row of mbs
+ /* adjust to the next row of mbs */
vp8_extend_mb_row(
- &pc->new_frame,
+ &pc->yv12_fb[dst_fb_idx],
xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
);
++xd->mode_info_context; /* skip prediction column */
-
- pbi->last_mb_row_decoded = mb_row;
}
@@ -432,7 +447,7 @@ static void setup_token_decoder(VP8D_COMP *pbi,
for (i = 0; i < num_part; i++)
{
const unsigned char *partition_size_ptr = cx_data + i * 3;
- unsigned int partition_size;
+ ptrdiff_t partition_size;
/* Calculate the length of this partition. The last partition
* size is implicit.
@@ -446,7 +461,8 @@ static void setup_token_decoder(VP8D_COMP *pbi,
partition_size = user_data_end - partition;
}
- if (partition + partition_size > user_data_end)
+ if (partition + partition_size > user_data_end
+ || partition + partition_size < partition)
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt partition "
"%d length", i + 1);
@@ -473,18 +489,7 @@ static void stop_token_decoder(VP8D_COMP *pbi)
VP8_COMMON *pc = &pbi->common;
if (pc->multi_token_partition != ONE_PARTITION)
- {
- int num_part = (1 << pc->multi_token_partition);
-
- for (i = 0; i < num_part; i++)
- {
- vp8dx_stop_decode(&pbi->mbc[i]);
- }
-
vpx_free(pbi->mbc);
- }
- else
- vp8dx_stop_decode(& pbi->bc2);
}
static void init_frame(VP8D_COMP *pbi)
@@ -494,7 +499,7 @@ static void init_frame(VP8D_COMP *pbi)
if (pc->frame_type == KEY_FRAME)
{
- // Various keyframe initializations
+ /* Various keyframe initializations */
vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
vp8_init_mbmode_probs(pc);
@@ -502,22 +507,23 @@ static void init_frame(VP8D_COMP *pbi)
vp8_default_coef_probs(pc);
vp8_kf_default_bmode_probs(pc->kf_bmode_prob);
- // reset the segment feature data to 0 with delta coding (Default state).
+ /* reset the segment feature data to 0 with delta coding (Default state). */
vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
xd->mb_segement_abs_delta = SEGMENT_DELTADATA;
- // reset the mode ref deltasa for loop filter
+ /* reset the mode ref deltasa for loop filter */
vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
- // All buffers are implicitly updated on key frames.
+ /* All buffers are implicitly updated on key frames. */
pc->refresh_golden_frame = 1;
pc->refresh_alt_ref_frame = 1;
pc->copy_buffer_to_gf = 0;
pc->copy_buffer_to_arf = 0;
- // Note that Golden and Altref modes cannot be used on a key frame so
- // ref_frame_sign_bias[] is undefined and meaningless
+ /* Note that Golden and Altref modes cannot be used on a key frame so
+ * ref_frame_sign_bias[] is undefined and meaningless
+ */
pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0;
pc->ref_frame_sign_bias[ALTREF_FRAME] = 0;
}
@@ -528,7 +534,7 @@ static void init_frame(VP8D_COMP *pbi)
else
pc->mcomp_filter_type = BILINEAR;
- // To enable choice of different interploation filters
+ /* To enable choice of different interploation filters */
if (pc->mcomp_filter_type == SIXTAP)
{
xd->subpixel_predict = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap4x4);
@@ -548,7 +554,7 @@ static void init_frame(VP8D_COMP *pbi)
xd->left_context = &pc->left_context;
xd->mode_info_context = pc->mi;
xd->frame_type = pc->frame_type;
- xd->mbmi.mode = DC_PRED;
+ xd->mode_info_context->mbmi.mode = DC_PRED;
xd->mode_info_stride = pc->mode_info_stride;
}
@@ -559,11 +565,14 @@ int vp8_decode_frame(VP8D_COMP *pbi)
MACROBLOCKD *const xd = & pbi->mb;
const unsigned char *data = (const unsigned char *)pbi->Source;
const unsigned char *const data_end = data + pbi->source_sz;
- int first_partition_length_in_bytes;
+ ptrdiff_t first_partition_length_in_bytes;
int mb_row;
int i, j, k, l;
const int *const mb_feature_data_bits = vp8_mb_feature_data_bits;
+ if (data_end - data < 3)
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet");
pc->frame_type = (FRAME_TYPE)(data[0] & 1);
pc->version = (data[0] >> 1) & 7;
pc->show_frame = (data[0] >> 4) & 1;
@@ -571,7 +580,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
(data[0] | (data[1] << 8) | (data[2] << 16)) >> 5;
data += 3;
- if (data + first_partition_length_in_bytes > data_end)
+ if (data + first_partition_length_in_bytes > data_end
+ || data + first_partition_length_in_bytes < data)
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt partition 0 length");
vp8_setup_version(pc);
@@ -581,7 +591,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
const int Width = pc->Width;
const int Height = pc->Height;
- // vet via sync code
+ /* vet via sync code */
if (data[0] != 0x9d || data[1] != 0x01 || data[2] != 0x2a)
vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame sync code");
@@ -594,6 +604,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
if (Width != pc->Width || Height != pc->Height)
{
+ int prev_mb_rows = pc->mb_rows;
+
if (pc->Width <= 0)
{
pc->Width = Width;
@@ -608,9 +620,14 @@ int vp8_decode_frame(VP8D_COMP *pbi)
"Invalid frame height");
}
- if (vp8_alloc_frame_buffers(&pbi->common, pc->Width, pc->Height))
+ if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height))
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate frame buffers");
+
+#if CONFIG_MULTITHREAD
+ if (pbi->b_multithreaded_rd)
+ vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows);
+#endif
}
}
@@ -630,11 +647,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc);
}
- // Is segmentation enabled
+ /* Is segmentation enabled */
xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc);
if (xd->segmentation_enabled)
{
- // Signal whether or not the segmentation map is being explicitly updated this frame.
+ /* Signal whether or not the segmentation map is being explicitly updated this frame. */
xd->update_mb_segmentation_map = (unsigned char)vp8_read_bit(bc);
xd->update_mb_segmentation_data = (unsigned char)vp8_read_bit(bc);
@@ -644,12 +661,12 @@ int vp8_decode_frame(VP8D_COMP *pbi)
vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
- // For each segmentation feature (Quant and loop filter level)
+ /* For each segmentation feature (Quant and loop filter level) */
for (i = 0; i < MB_LVL_MAX; i++)
{
for (j = 0; j < MAX_MB_SEGMENTS; j++)
{
- // Frame level data
+ /* Frame level data */
if (vp8_read_bit(bc))
{
xd->segment_feature_data[i][j] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]);
@@ -665,60 +682,60 @@ int vp8_decode_frame(VP8D_COMP *pbi)
if (xd->update_mb_segmentation_map)
{
- // Which macro block level features are enabled
+ /* Which macro block level features are enabled */
vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
#if CONFIG_SEGMENTATION
- // Read the probs used to decode the segment id for each macro block.
+ /* Read the probs used to decode the segment id for each macro block. */
for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++)
#else
for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
#endif
{
- // If not explicitly set value is defaulted to 255 by memset above
+ /* If not explicitly set value is defaulted to 255 by memset above */
if (vp8_read_bit(bc))
xd->mb_segment_tree_probs[i] = (vp8_prob)vp8_read_literal(bc, 8);
}
}
}
- // Read the loop filter level and type
+ /* Read the loop filter level and type */
pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(bc);
pc->filter_level = vp8_read_literal(bc, 6);
pc->sharpness_level = vp8_read_literal(bc, 3);
- // Read in loop filter deltas applied at the MB level based on mode or ref frame.
+ /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */
xd->mode_ref_lf_delta_update = 0;
xd->mode_ref_lf_delta_enabled = (unsigned char)vp8_read_bit(bc);
if (xd->mode_ref_lf_delta_enabled)
{
- // Do the deltas need to be updated
+ /* Do the deltas need to be updated */
xd->mode_ref_lf_delta_update = (unsigned char)vp8_read_bit(bc);
if (xd->mode_ref_lf_delta_update)
{
- // Send update
+ /* Send update */
for (i = 0; i < MAX_REF_LF_DELTAS; i++)
{
if (vp8_read_bit(bc))
{
- //sign = vp8_read_bit( bc );
+ /*sign = vp8_read_bit( bc );*/
xd->ref_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6);
- if (vp8_read_bit(bc)) // Apply sign
+ if (vp8_read_bit(bc)) /* Apply sign */
xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1;
}
}
- // Send update
+ /* Send update */
for (i = 0; i < MAX_MODE_LF_DELTAS; i++)
{
if (vp8_read_bit(bc))
{
- //sign = vp8_read_bit( bc );
+ /*sign = vp8_read_bit( bc );*/
xd->mode_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6);
- if (vp8_read_bit(bc)) // Apply sign
+ if (vp8_read_bit(bc)) /* Apply sign */
xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1;
}
}
@@ -728,11 +745,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
setup_token_decoder(pbi, data + first_partition_length_in_bytes);
xd->current_bc = &pbi->bc2;
- // Read the default quantizers.
+ /* Read the default quantizers. */
{
int Q, q_update;
- Q = vp8_read_literal(bc, 7); // AC 1st order Q = default
+ Q = vp8_read_literal(bc, 7); /* AC 1st order Q = default */
pc->base_qindex = Q;
q_update = 0;
pc->y1dc_delta_q = get_delta_q(bc, pc->y1dc_delta_q, &q_update);
@@ -744,20 +761,21 @@ int vp8_decode_frame(VP8D_COMP *pbi)
if (q_update)
vp8cx_init_de_quantizer(pbi);
- // MB level dequantizer setup
+ /* MB level dequantizer setup */
mb_init_dequantizer(pbi, &pbi->mb);
}
- // Determine if the golden frame or ARF buffer should be updated and how.
- // For all non key frames the GF and ARF refresh flags and sign bias
- // flags must be set explicitly.
+ /* Determine if the golden frame or ARF buffer should be updated and how.
+ * For all non key frames the GF and ARF refresh flags and sign bias
+ * flags must be set explicitly.
+ */
if (pc->frame_type != KEY_FRAME)
{
- // Should the GF or ARF be updated from the current frame
+ /* Should the GF or ARF be updated from the current frame */
pc->refresh_golden_frame = vp8_read_bit(bc);
pc->refresh_alt_ref_frame = vp8_read_bit(bc);
- // Buffer to buffer copy flags.
+ /* Buffer to buffer copy flags. */
pc->copy_buffer_to_gf = 0;
if (!pc->refresh_golden_frame)
@@ -793,9 +811,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
fclose(z);
}
- vp8dx_bool_decoder_fill(bc);
{
- // read coef probability tree
+ /* read coef probability tree */
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < COEF_BANDS; j++)
@@ -813,57 +830,54 @@ int vp8_decode_frame(VP8D_COMP *pbi)
}
}
- vpx_memcpy(&xd->pre, &pc->last_frame, sizeof(YV12_BUFFER_CONFIG));
- vpx_memcpy(&xd->dst, &pc->new_frame, sizeof(YV12_BUFFER_CONFIG));
+ vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG));
+ vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
#if CONFIG_SEGMENTATION
// Create the encoder segmentation map and set all entries to 0
CHECK_MEM_ERROR(pbi->segmentation_map, vpx_calloc((pc->mb_rows * pc->mb_cols), 1));
#endif
- // set up frame new frame for intra coded blocks
- vp8_setup_intra_recon(&pc->new_frame);
+ /* set up frame new frame for intra coded blocks */
+ if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level))
+ vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
vp8_setup_block_dptrs(xd);
vp8_build_block_doffsets(xd);
- // clear out the coeff buffer
+ /* clear out the coeff buffer */
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
- // Read the mb_no_coeff_skip flag
+ /* Read the mb_no_coeff_skip flag */
pc->mb_no_coeff_skip = (int)vp8_read_bit(bc);
- if (pc->frame_type == KEY_FRAME)
- vp8_kfread_modes(pbi);
- else
- vp8_decode_mode_mvs(pbi);
- // reset since these guys are used as iterators
- vpx_memset(pc->above_context[Y1CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 4);
- vpx_memset(pc->above_context[UCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
- vpx_memset(pc->above_context[VCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
- vpx_memset(pc->above_context[Y2CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols);
-
- xd->gf_active_ptr = (signed char *)pc->gf_active_flags; // Point to base of GF active flags data structure
+ vp8_decode_mode_mvs(pbi);
+ vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO));
-
- if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
- vp8_start_lfthread(pbi);
-
- if (pbi->b_multithreaded_rd && pbi->common.multi_token_partition != ONE_PARTITION)
+ if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION)
{
- vp8_mtdecode_mb_rows(pbi, xd);
+ vp8mt_decode_mb_rows(pbi, xd);
+ if(pbi->common.filter_level)
+ {
+ /*vp8_mt_loop_filter_frame(pbi);*/ /*cm, &pbi->mb, cm->filter_level);*/
+
+ pc->last_frame_type = pc->frame_type;
+ pc->last_filter_type = pc->filter_type;
+ pc->last_sharpness_level = pc->sharpness_level;
+ }
+ vp8_yv12_extend_frame_borders_ptr(&pc->yv12_fb[pc->new_fb_idx]); /*cm->frame_to_show);*/
}
else
{
int ibc = 0;
- int num_part = 1 << pbi->common.multi_token_partition;
+ int num_part = 1 << pc->multi_token_partition;
- // Decode the individual macro block
+ /* Decode the individual macro block */
for (mb_row = 0; mb_row < pc->mb_rows; mb_row++)
{
@@ -878,20 +892,19 @@ int vp8_decode_frame(VP8D_COMP *pbi)
vp8_decode_mb_row(pbi, pc, mb_row, xd);
}
-
- pbi->last_mb_row_decoded = mb_row;
}
stop_token_decoder(pbi);
- vp8dx_stop_decode(bc);
-
- // vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos);
+ /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */
- // If this was a kf or Gf note the Q used
- if ((pc->frame_type == KEY_FRAME) || (pc->refresh_golden_frame) || pbi->common.refresh_alt_ref_frame)
+ /* If this was a kf or Gf note the Q used */
+ if ((pc->frame_type == KEY_FRAME) ||
+ pc->refresh_golden_frame || pc->refresh_alt_ref_frame)
+ {
pc->last_kf_gf_q = pc->base_qindex;
+ }
if (pc->refresh_entropy_probs == 0)
{
diff --git a/vp8/decoder/demode.c b/vp8/decoder/demode.c
deleted file mode 100644
index 74fe91803..000000000
--- a/vp8/decoder/demode.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "onyxd_int.h"
-#include "entropymode.h"
-#include "findnearmv.h"
-
-
-int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p)
-{
- const int i = vp8_treed_read(bc, vp8_bmode_tree, p);
-
- return i;
-}
-
-
-int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p)
-{
- const int i = vp8_treed_read(bc, vp8_ymode_tree, p);
-
- return i;
-}
-
-int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p)
-{
- const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p);
-
- return i;
-}
-
-
-
-int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
-{
- const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p);
-
- return i;
-}
-
-void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
-{
- // Is segmentation enabled
- if (x->segmentation_enabled && x->update_mb_segmentation_map)
- {
- // If so then read the segment id.
- if (vp8_read(r, x->mb_segment_tree_probs[0]))
- mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
- else
- mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1]));
- }
-}
-
-void vp8_kfread_modes(VP8D_COMP *pbi)
-{
- VP8_COMMON *const cp = & pbi->common;
- vp8_reader *const bc = & pbi->bc;
-
- MODE_INFO *m = cp->mi;
- const int ms = cp->mode_info_stride;
-#if CONFIG_SEGMENTATION
- int left_id,above_id;
- int i;
-#endif
- int mb_row = -1;
- vp8_prob prob_skip_false = 0;
-
- if (cp->mb_no_coeff_skip)
- prob_skip_false = (vp8_prob)(vp8_read_literal(bc, 8));
-
- while (++mb_row < cp->mb_rows)
- {
- int mb_col = -1;
-
- while (++mb_col < cp->mb_cols)
- {
- MB_PREDICTION_MODE y_mode;
- vp8dx_bool_decoder_fill(bc);
-
- // Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
- // By default on a key frame reset all MBs to segment 0
- m->mbmi.segment_id = 0;
-
- if (pbi->mb.update_mb_segmentation_map)
- {
-
-#if CONFIG_SEGMENTATION
- vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
- pbi->segmentation_map[(mb_row * cp->mb_cols) + mb_col] = m->mbmi.segment_id;
-#else
- vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
-#endif
- }
-
- // Read the macroblock coeff skip flag if this feature is in use, else default to 0
- if (cp->mb_no_coeff_skip)
- m->mbmi.mb_skip_coeff = vp8_read(bc, prob_skip_false);
- else
- m->mbmi.mb_skip_coeff = 0;
-
- y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, cp->kf_ymode_prob);
-
- m->mbmi.ref_frame = INTRA_FRAME;
-
- if ((m->mbmi.mode = y_mode) == B_PRED)
- {
- int i = 0;
-
- do
- {
- const B_PREDICTION_MODE A = vp8_above_bmi(m, i, ms)->mode;
- const B_PREDICTION_MODE L = vp8_left_bmi(m, i)->mode;
-
- m->bmi[i].mode = (B_PREDICTION_MODE) vp8_read_bmode(bc, cp->kf_bmode_prob [A] [L]);
- }
- while (++i < 16);
- }
- else
- {
- int BMode;
- int i = 0;
-
- switch (y_mode)
- {
- case DC_PRED:
- BMode = B_DC_PRED;
- break;
- case V_PRED:
- BMode = B_VE_PRED;
- break;
- case H_PRED:
- BMode = B_HE_PRED;
- break;
- case TM_PRED:
- BMode = B_TM_PRED;
- break;
- default:
- BMode = B_DC_PRED;
- break;
- }
-
- do
- {
- m->bmi[i].mode = (B_PREDICTION_MODE)BMode;
- }
- while (++i < 16);
- }
-
- (m++)->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, cp->kf_uv_mode_prob);
- }
-
- m++; // skip the border
- }
-}
diff --git a/vp8/decoder/demode.h b/vp8/decoder/demode.h
deleted file mode 100644
index 51bbc5e7a..000000000
--- a/vp8/decoder/demode.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "onyxd_int.h"
-
-/* Read (intra) modes for all blocks in a keyframe */
-
-void vp8_kfread_modes(VP8D_COMP *pbi);
-
-/* Intra mode for a Y subblock */
-
-int vp8_read_bmode(vp8_reader *, const vp8_prob *);
-
-/* MB intra Y mode trees differ for key and inter frames. */
-
-int vp8_read_ymode(vp8_reader *, const vp8_prob *);
-int vp8_kfread_ymode(vp8_reader *, const vp8_prob *);
-
-/* MB intra UV mode trees are the same for key and inter frames. */
-
-int vp8_read_uv_mode(vp8_reader *, const vp8_prob *);
-
-/* Read any macroblock-level features that may be present. */
-
-void vp8_read_mb_features(vp8_reader *, MB_MODE_INFO *, MACROBLOCKD *);
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index 14798d9af..84a9fd943 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -23,7 +24,7 @@ void vp8_dequantize_b_c(BLOCKD *d)
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
- short *DQC = &d->dequant[0][0];
+ short *DQC = d->dequant;
for (i = 0; i < 16; i++)
{
@@ -31,8 +32,12 @@ void vp8_dequantize_b_c(BLOCKD *d)
}
}
-void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch)
+void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
+ unsigned char *dest, int pitch, int stride)
{
+ short output[16];
+ short *diff_ptr = output;
+ int r, c;
int i;
for (i = 0; i < 16; i++)
@@ -40,13 +45,40 @@ void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch)
input[i] = dq[i] * input[i];
}
- vp8_short_idct4x4llm_c(input, output, pitch);
+ /* the idct halves ( >> 1) the pitch */
+ vp8_short_idct4x4llm_c(input, output, 4 << 1);
+
vpx_memset(input, 0, 32);
+
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ int a = diff_ptr[c] + pred[c];
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dest[c] = (unsigned char) a;
+ }
+
+ dest += stride;
+ diff_ptr += 4;
+ pred += pitch;
+ }
}
-void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc)
+void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
+ unsigned char *dest, int pitch, int stride,
+ int Dc)
{
int i;
+ short output[16];
+ short *diff_ptr = output;
+ int r, c;
input[0] = (short)Dc;
@@ -55,6 +87,28 @@ void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, in
input[i] = dq[i] * input[i];
}
- vp8_short_idct4x4llm_c(input, output, pitch);
+ /* the idct halves ( >> 1) the pitch */
+ vp8_short_idct4x4llm_c(input, output, 4 << 1);
+
vpx_memset(input, 0, 32);
+
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ int a = diff_ptr[c] + pred[c];
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dest[c] = (unsigned char) a;
+ }
+
+ dest += stride;
+ diff_ptr += 4;
+ pred += pitch;
+ }
}
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index d16b02e58..b78e39c1d 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -15,11 +16,31 @@
#define prototype_dequant_block(sym) \
void sym(BLOCKD *x)
-#define prototype_dequant_idct(sym) \
- void sym(short *input, short *dq, short *output, int pitch)
+#define prototype_dequant_idct_add(sym) \
+ void sym(short *input, short *dq, \
+ unsigned char *pred, unsigned char *output, \
+ int pitch, int stride)
-#define prototype_dequant_idct_dc(sym) \
- void sym(short *input, short *dq, short *output, int pitch, int dc)
+#define prototype_dequant_dc_idct_add(sym) \
+ void sym(short *input, short *dq, \
+ unsigned char *pred, unsigned char *output, \
+ int pitch, int stride, \
+ int dc)
+
+#define prototype_dequant_dc_idct_add_y_block(sym) \
+ void sym(short *q, short *dq, \
+ unsigned char *pre, unsigned char *dst, \
+ int stride, char *eobs, short *dc)
+
+#define prototype_dequant_idct_add_y_block(sym) \
+ void sym(short *q, short *dq, \
+ unsigned char *pre, unsigned char *dst, \
+ int stride, char *eobs)
+
+#define prototype_dequant_idct_add_uv_block(sym) \
+ void sym(short *q, short *dq, \
+ unsigned char *pre, unsigned char *dst_u, \
+ unsigned char *dst_v, int stride, char *eobs)
#if ARCH_X86 || ARCH_X86_64
#include "x86/dequantize_x86.h"
@@ -34,25 +55,52 @@
#endif
extern prototype_dequant_block(vp8_dequant_block);
-#ifndef vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_c
+#ifndef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_c
+#endif
+extern prototype_dequant_idct_add(vp8_dequant_idct_add);
+
+#ifndef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_c
+#endif
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add);
+
+#ifndef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_c
#endif
-extern prototype_dequant_idct(vp8_dequant_idct);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block);
-#ifndef vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_c
+#ifndef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c
#endif
-extern prototype_dequant_idct_dc(vp8_dequant_idct_dc);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block);
+
+#ifndef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c
+#endif
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block);
typedef prototype_dequant_block((*vp8_dequant_block_fn_t));
-typedef prototype_dequant_idct((*vp8_dequant_idct_fn_t));
-typedef prototype_dequant_idct_dc((*vp8_dequant_idct_dc_fn_t));
+
+typedef prototype_dequant_idct_add((*vp8_dequant_idct_add_fn_t));
+
+typedef prototype_dequant_dc_idct_add((*vp8_dequant_dc_idct_add_fn_t));
+
+typedef prototype_dequant_dc_idct_add_y_block((*vp8_dequant_dc_idct_add_y_block_fn_t));
+
+typedef prototype_dequant_idct_add_y_block((*vp8_dequant_idct_add_y_block_fn_t));
+
+typedef prototype_dequant_idct_add_uv_block((*vp8_dequant_idct_add_uv_block_fn_t));
+
typedef struct
{
- vp8_dequant_block_fn_t block;
- vp8_dequant_idct_fn_t idct;
- vp8_dequant_idct_dc_fn_t idct_dc;
+ vp8_dequant_block_fn_t block;
+ vp8_dequant_idct_add_fn_t idct_add;
+ vp8_dequant_dc_idct_add_fn_t dc_idct_add;
+ vp8_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block;
+ vp8_dequant_idct_add_y_block_fn_t idct_add_y_block;
+ vp8_dequant_idct_add_uv_block_fn_t idct_add_uv_block;
} vp8_dequant_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index a42f18dd7..7d013d240 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,12 +14,12 @@
#include "onyxd_int.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
+#include "detokenize.h"
-#define BR_COUNT 8
#define BOOL_DATA UINT8
#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-DECLARE_ALIGNED(16, UINT16, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
+DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
#define EOB_CONTEXT_NODE 0
#define ZERO_CONTEXT_NODE 1
#define ONE_CONTEXT_NODE 2
@@ -43,49 +44,72 @@ typedef struct
DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
{
- { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ZERO_TOKEN
- { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ONE_TOKEN
- { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //TWO_TOKEN
- { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //THREE_TOKEN
- { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //FOUR_TOKEN
- { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY1
- { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY2
- { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY3
- { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY4
- { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY5
- { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, //DCT_VAL_CATEGORY6
- { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, // EOB TOKEN
+ { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* ZERO_TOKEN */
+ { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* ONE_TOKEN */
+ { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* TWO_TOKEN */
+ { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* THREE_TOKEN */
+ { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* FOUR_TOKEN */
+ { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY1 */
+ { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY2 */
+ { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY3 */
+ { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY4 */
+ { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY5 */
+ { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, /* DCT_VAL_CATEGORY6 */
+ { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* EOB TOKEN */
};
void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
{
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
- int i;
-
- for (i = 0; i < 24; i++)
+ /* Clear entropy contexts for Y2 blocks */
+ if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
{
-
- a = A[ vp8_block2context[i] ] + vp8_block2above[i];
- l = L[ vp8_block2context[i] ] + vp8_block2left[i];
-
- *a = *l = 0;
+ vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
}
-
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+ else
{
- a = A[Y2CONTEXT] + vp8_block2above[24];
- l = L[Y2CONTEXT] + vp8_block2left[24];
- *a = *l = 0;
+ vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
+ vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
}
+}
+#if CONFIG_ARM_ASM_DETOK
+/* mashup of vp8_block2left and vp8_block2above so we only need one pointer
+ * for the assembly version.
+ */
+DECLARE_ALIGNED(16, const UINT8, vp8_block2leftabove[25*2]) =
+{
+ /* vp8_block2left */
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+ /* vp8_block2above */
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
+};
+void vp8_init_detokenizer(VP8D_COMP *dx)
+{
+ const VP8_COMMON *const oc = & dx->common;
+ MACROBLOCKD *x = & dx->mb;
+
+ dx->detoken.vp8_coef_tree_ptr = vp8_coef_tree;
+ dx->detoken.ptr_block2leftabove = vp8_block2leftabove;
+ dx->detoken.ptr_coef_bands_x = vp8_coef_bands_x;
+ dx->detoken.scan = vp8_default_zig_zag1d;
+ dx->detoken.teb_base_ptr = vp8d_token_extra_bits2;
+ dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
+
+ dx->detoken.coef_probs[0] = (oc->fc.coef_probs [0] [ 0 ] [0]);
+ dx->detoken.coef_probs[1] = (oc->fc.coef_probs [1] [ 0 ] [0]);
+ dx->detoken.coef_probs[2] = (oc->fc.coef_probs [2] [ 0 ] [0]);
+ dx->detoken.coef_probs[3] = (oc->fc.coef_probs [3] [ 0 ] [0]);
}
-DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
+#endif
+
+DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
+#define FILL \
+ if(count < 0) \
+ VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
+
#define NORMALIZE \
/*if(range < 0x80)*/ \
{ \
@@ -93,17 +117,13 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
range <<= shift; \
value <<= shift; \
count -= shift; \
- if(count <= 0) \
- { \
- count += BR_COUNT ; \
- value |= (*bufptr) << (BR_COUNT-count); \
- bufptr = br_ptr_advance(bufptr, 1); \
- } \
}
#define DECODE_AND_APPLYSIGN(value_to_sign) \
split = (range + 1) >> 1; \
- if ( (value >> 8) < split ) \
+ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+ FILL \
+ if ( value < bigsplit ) \
{ \
range = split; \
v= value_to_sign; \
@@ -111,28 +131,25 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
else \
{ \
range = range-split; \
- value = value-(split<<8); \
+ value = value-bigsplit; \
v = -value_to_sign; \
} \
range +=range; \
value +=value; \
- if (!--count) \
- { \
- count = BR_COUNT; \
- value |= *bufptr; \
- bufptr = br_ptr_advance(bufptr, 1); \
- }
+ count--;
#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
{ \
split = 1 + ((( probability*(range-1) ) )>> 8); \
- if ( (value >> 8) < split ) \
+ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+ FILL \
+ if ( value < bigsplit ) \
{ \
range = split; \
NORMALIZE \
goto branch; \
} \
- value -= (split<<8); \
+ value -= bigsplit; \
range = range - split; \
NORMALIZE \
}
@@ -140,7 +157,9 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
{ \
split = 1 + ((( probability*(range-1) ) ) >> 8); \
- if ( (value >> 8) < split ) \
+ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+ FILL \
+ if ( value < bigsplit ) \
{ \
range = split; \
NORMALIZE \
@@ -151,7 +170,7 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
goto branch; \
} goto BLOCK_FINISHED; /*for malformed input */\
} \
- value -= (split<<8); \
+ value -= bigsplit; \
range = range - split; \
NORMALIZE \
}
@@ -169,10 +188,12 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
- if(value >= (split<<8))\
+ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+ FILL \
+ if(value >= bigsplit)\
{\
range = range-split;\
- value = value-(split<<8);\
+ value = value-bigsplit;\
val += ((UINT16)1<<bits_count);\
}\
else\
@@ -181,14 +202,45 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
}\
NORMALIZE
+#if CONFIG_ARM_ASM_DETOK
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
{
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+ int eobtotal = 0;
+ int i, type;
+
+ dx->detoken.current_bc = x->current_bc;
+ dx->detoken.A = x->above_context;
+ dx->detoken.L = x->left_context;
+
+ type = 3;
+
+ if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
+ {
+ type = 1;
+ eobtotal -= 16;
+ }
+
+ vp8_decode_mb_tokens_v6(&dx->detoken, type);
+
+ for (i = 0; i < 25; i++)
+ {
+ x->eobs[i] = dx->detoken.eob[i];
+ eobtotal += dx->detoken.eob[i];
+ }
+
+ return eobtotal;
+}
+#else
+int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
+{
+ ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
+ ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;
const VP8_COMMON *const oc = & dx->common;
BOOL_DECODER *bc = x->current_bc;
+ char *eobs = x->eobs;
+
ENTROPY_CONTEXT *a;
ENTROPY_CONTEXT *l;
int i;
@@ -198,11 +250,13 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
register int count;
const BOOL_DATA *bufptr;
+ const BOOL_DATA *bufend;
register unsigned int range;
- register unsigned int value;
+ VP8_BD_VALUE value;
const int *scan;
register unsigned int shift;
UINT32 split;
+ VP8_BD_VALUE bigsplit;
INT16 *qcoeff_ptr;
const vp8_prob *coef_probs;
@@ -210,46 +264,44 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
int stop;
INT16 val, bits_count;
INT16 c;
- INT16 t;
INT16 v;
const vp8_prob *Prob;
- //int *scan;
type = 3;
i = 0;
stop = 16;
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+ scan = vp8_default_zig_zag1d;
+ qcoeff_ptr = &x->qcoeff[0];
+
+ if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
{
i = 24;
stop = 24;
type = 1;
- qcoeff_ptr = &x->qcoeff[24*16];
- scan = vp8_default_zig_zag1d;
+ qcoeff_ptr += 24*16;
eobtotal -= 16;
}
- else
- {
- scan = vp8_default_zig_zag1d;
- qcoeff_ptr = &x->qcoeff[0];
- }
+ bufend = bc->user_buffer_end;
+ bufptr = bc->user_buffer;
+ value = bc->value;
count = bc->count;
range = bc->range;
- value = bc->value;
- bufptr = bc->read_ptr;
coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
BLOCK_LOOP:
- a = A[ vp8_block2context[i] ] + vp8_block2above[i];
- l = L[ vp8_block2context[i] ] + vp8_block2left[i];
+ a = A + vp8_block2above[i];
+ l = L + vp8_block2left[i];
+
c = (INT16)(!type);
- VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
+ /*Dest = ((A)!=0) + ((B)!=0);*/
+ VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
Prob = coef_probs;
- Prob += t * ENTROPY_NODES;
+ Prob += v * ENTROPY_NODES;
DO_WHILE:
Prob += vp8_coef_bands_x[c];
@@ -336,9 +388,8 @@ ONE_CONTEXT_NODE_0_:
qcoeff_ptr [ scan[15] ] = (INT16) v;
BLOCK_FINISHED:
- t = ((x->block[i].eob = c) != !type); // any nonzero data?
- eobtotal += x->block[i].eob;
- *a = *l = t;
+ *a = *l = ((eobs[i] = c) != !type); /* any nonzero data? */
+ eobtotal += c;
qcoeff_ptr += 16;
i++;
@@ -348,12 +399,11 @@ BLOCK_FINISHED:
if (i == 25)
{
- scan = vp8_default_zig_zag1d;//x->scan_order1d;
type = 0;
i = 0;
stop = 16;
coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
- qcoeff_ptr = &x->qcoeff[0];
+ qcoeff_ptr -= (24*16 + 16);
goto BLOCK_LOOP;
}
@@ -365,10 +415,12 @@ BLOCK_FINISHED:
goto BLOCK_LOOP;
}
- bc->count = count;
+ FILL
+ bc->user_buffer = bufptr;
bc->value = value;
+ bc->count = count;
bc->range = range;
- bc->read_ptr = bufptr;
return eobtotal;
}
+#endif /*!CONFIG_ASM_DETOK*/
diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h
index 6a9a47607..294a4a55d 100644
--- a/vp8/decoder/detokenize.h
+++ b/vp8/decoder/detokenize.h
@@ -1,19 +1,24 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef detokenize_h
-#define detokenize_h 1
+#ifndef DETOKENIZE_H
+#define DETOKENIZE_H
#include "onyxd_int.h"
+#if ARCH_ARM
+#include "arm/detokenize_arm.h"
+#endif
+
void vp8_reset_mb_tokens_context(MACROBLOCKD *x);
int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
-#endif /* detokenize_h */
+#endif /* DETOKENIZE_H */
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 302b64bf8..2e284729b 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,19 +14,22 @@
#include "onyxd_int.h"
extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
+extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi);
void vp8_dmachine_specific_config(VP8D_COMP *pbi)
{
- // Pure C:
+ /* Pure C: */
#if CONFIG_RUNTIME_CPU_DETECT
- pbi->mb.rtcd = &pbi->common.rtcd;
- pbi->dequant.block = vp8_dequantize_b_c;
- pbi->dequant.idct = vp8_dequant_idct_c;
- pbi->dequant.idct_dc = vp8_dequant_dc_idct_c;
- pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.stop = vp8dx_stop_decode_c;
- pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
-#if 0 //For use with RTCD, when implemented
+ pbi->mb.rtcd = &pbi->common.rtcd;
+ pbi->dequant.block = vp8_dequantize_b_c;
+ pbi->dequant.idct_add = vp8_dequant_idct_add_c;
+ pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_c;
+ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_c;
+ pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c;
+ pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
+ pbi->dboolhuff.start = vp8dx_start_decode_c;
+ pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
+#if 0 /*For use with RTCD, when implemented*/
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
pbi->dboolhuff.devalue = vp8dx_decode_value_c;
#endif
@@ -34,4 +38,8 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
#if ARCH_X86 || ARCH_X86_64
vp8_arch_x86_decode_init(pbi);
#endif
+
+#if ARCH_ARM
+ vp8_arch_arm_decode_init(pbi);
+#endif
}
diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c
new file mode 100644
index 000000000..c98bd5bb8
--- /dev/null
+++ b/vp8/decoder/idct_blk.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
+ unsigned char *dest, int pitch, int stride,
+ int Dc);
+void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
+ unsigned char *dest, int pitch, int stride);
+void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
+ unsigned char *dst_ptr, int pitch, int stride);
+
+void vp8_dequant_dc_idct_add_y_block_c
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs, short *dc)
+{
+ int i, j;
+
+ for (i = 0; i < 4; i++)
+ {
+ for (j = 0; j < 4; j++)
+ {
+ if (*eobs++ > 1)
+ vp8_dequant_dc_idct_add_c (q, dq, pre, dst, 16, stride, dc[0]);
+ else
+ vp8_dc_only_idct_add_c (dc[0], pre, dst, 16, stride);
+
+ q += 16;
+ pre += 4;
+ dst += 4;
+ dc ++;
+ }
+
+ pre += 64 - 16;
+ dst += 4*stride - 16;
+ }
+}
+
+void vp8_dequant_idct_add_y_block_c
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i, j;
+
+ for (i = 0; i < 4; i++)
+ {
+ for (j = 0; j < 4; j++)
+ {
+ if (*eobs++ > 1)
+ vp8_dequant_idct_add_c (q, dq, pre, dst, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dst, 16, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ pre += 4;
+ dst += 4;
+ }
+
+ pre += 64 - 16;
+ dst += 4*stride - 16;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_c
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ int i, j;
+
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < 2; j++)
+ {
+ if (*eobs++ > 1)
+ vp8_dequant_idct_add_c (q, dq, pre, dstu, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dstu, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ pre += 4;
+ dstu += 4;
+ }
+
+ pre += 32 - 8;
+ dstu += 4*stride - 8;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < 2; j++)
+ {
+ if (*eobs++ > 1)
+ vp8_dequant_idct_add_c (q, dq, pre, dstv, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dstv, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ pre += 4;
+ dstv += 4;
+ }
+
+ pre += 32 - 8;
+ dstv += 4*stride - 8;
+ }
+}
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 8d2b267a9..063b6a468 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -23,18 +24,19 @@
#include "threading.h"
#include "decoderthreading.h"
#include <stdio.h>
-#include "segmentation_common.h"
+
#include "quant_common.h"
#include "vpx_scale/vpxscale.h"
#include "systemdependent.h"
#include "vpx_ports/vpx_timer.h"
-
+#include "detokenize.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
extern void vp8_init_loop_filter(VP8_COMMON *cm);
-
extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
-// DEBUG code
#if CONFIG_DEBUG
void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
{
@@ -110,12 +112,13 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
pbi->common.current_video_frame = 0;
pbi->ready_for_new_data = 1;
- pbi->CPUFreq = 0; //vp8_get_processor_freq();
+ pbi->CPUFreq = 0; /*vp8_get_processor_freq();*/
pbi->max_threads = oxcf->max_threads;
vp8_decoder_create_threads(pbi);
- //vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
- // unnecessary calling of vp8cx_init_de_quantizer() for every frame.
+ /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
+ * unnecessary calling of vp8cx_init_de_quantizer() for every frame.
+ */
vp8cx_init_de_quantizer(pbi);
{
@@ -127,6 +130,9 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
cm->last_sharpness_level = cm->sharpness_level;
}
+#if CONFIG_ARM_ASM_DETOK
+ vp8_init_detokenizer(pbi);
+#endif
pbi->common.error.setjmp = 0;
return (VP8D_PTR) pbi;
}
@@ -142,6 +148,11 @@ void vp8dx_remove_decompressor(VP8D_PTR ptr)
if (pbi->segmentation_map != 0)
vpx_free(pbi->segmentation_map);
#endif
+
+#if CONFIG_MULTITHREAD
+ if (pbi->b_multithreaded_rd)
+ vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows);
+#endif
vp8_decoder_remove_threads(pbi);
vp8_remove_common(&pbi->common);
vpx_free(pbi);
@@ -181,57 +192,143 @@ int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C
{
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
VP8_COMMON *cm = &pbi->common;
+ int ref_fb_idx;
if (ref_frame_flag == VP8_LAST_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
-
+ ref_fb_idx = cm->lst_fb_idx;
else if (ref_frame_flag == VP8_GOLD_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
-
+ ref_fb_idx = cm->gld_fb_idx;
else if (ref_frame_flag == VP8_ALT_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
-
+ ref_fb_idx = cm->alt_fb_idx;
else
return -1;
+ vp8_yv12_copy_frame_ptr(&cm->yv12_fb[ref_fb_idx], sd);
+
return 0;
}
int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
{
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
VP8_COMMON *cm = &pbi->common;
+ int ref_fb_idx;
if (ref_frame_flag == VP8_LAST_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
-
+ ref_fb_idx = cm->lst_fb_idx;
else if (ref_frame_flag == VP8_GOLD_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
-
+ ref_fb_idx = cm->gld_fb_idx;
else if (ref_frame_flag == VP8_ALT_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
-
+ ref_fb_idx = cm->alt_fb_idx;
else
return -1;
+ vp8_yv12_copy_frame_ptr(sd, &cm->yv12_fb[ref_fb_idx]);
+
return 0;
}
-//For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.
+/*For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.*/
#if HAVE_ARMV7
extern void vp8_push_neon(INT64 *store);
extern void vp8_pop_neon(INT64 *store);
-static INT64 dx_store_reg[8];
#endif
+
+static int get_free_fb (VP8_COMMON *cm)
+{
+ int i;
+ for (i = 0; i < NUM_YV12_BUFFERS; i++)
+ if (cm->fb_idx_ref_cnt[i] == 0)
+ break;
+
+ cm->fb_idx_ref_cnt[i] = 1;
+ return i;
+}
+
+static void ref_cnt_fb (int *buf, int *idx, int new_idx)
+{
+ if (buf[*idx] > 0)
+ buf[*idx]--;
+
+ *idx = new_idx;
+
+ buf[new_idx]++;
+}
+
+/* If any buffer copy / swapping is signalled it should be done here. */
+static int swap_frame_buffers (VP8_COMMON *cm)
+{
+ int fb_to_update_with, err = 0;
+
+ if (cm->refresh_last_frame)
+ fb_to_update_with = cm->lst_fb_idx;
+ else
+ fb_to_update_with = cm->new_fb_idx;
+
+ /* The alternate reference frame or golden frame can be updated
+ * using the new, last, or golden/alt ref frame. If it
+ * is updated using the newly decoded frame it is a refresh.
+ * An update using the last or golden/alt ref frame is a copy.
+ */
+ if (cm->copy_buffer_to_arf)
+ {
+ int new_fb = 0;
+
+ if (cm->copy_buffer_to_arf == 1)
+ new_fb = fb_to_update_with;
+ else if (cm->copy_buffer_to_arf == 2)
+ new_fb = cm->gld_fb_idx;
+ else
+ err = -1;
+
+ ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, new_fb);
+ }
+
+ if (cm->copy_buffer_to_gf)
+ {
+ int new_fb = 0;
+
+ if (cm->copy_buffer_to_gf == 1)
+ new_fb = fb_to_update_with;
+ else if (cm->copy_buffer_to_gf == 2)
+ new_fb = cm->alt_fb_idx;
+ else
+ err = -1;
+
+ ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, new_fb);
+ }
+
+ if (cm->refresh_golden_frame)
+ ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, cm->new_fb_idx);
+
+ if (cm->refresh_alt_ref_frame)
+ ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, cm->new_fb_idx);
+
+ if (cm->refresh_last_frame)
+ {
+ ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->lst_fb_idx, cm->new_fb_idx);
+
+ cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx];
+ }
+ else
+ cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
+
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+
+ return err;
+}
+
int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp)
{
+#if HAVE_ARMV7
+ INT64 dx_store_reg[8];
+#endif
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
VP8_COMMON *cm = &pbi->common;
int retcode = 0;
-
struct vpx_usec_timer timer;
-// if(pbi->ready_for_new_data == 0)
-// return -1;
+ /*if(pbi->ready_for_new_data == 0)
+ return -1;*/
if (ptr == 0)
{
@@ -240,21 +337,38 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
pbi->common.error.error_code = VPX_CODEC_OK;
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_push_neon(dx_store_reg);
+ }
+#endif
+
+ cm->new_fb_idx = get_free_fb (cm);
+
if (setjmp(pbi->common.error.jmp))
{
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
+#endif
pbi->common.error.setjmp = 0;
+ if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
return -1;
}
pbi->common.error.setjmp = 1;
-#if HAVE_ARMV7
- vp8_push_neon(dx_store_reg);
-#endif
-
vpx_usec_timer_start(&timer);
- //cm->current_video_frame++;
+ /*cm->current_video_frame++;*/
pbi->Source = source;
pbi->source_sz = size;
@@ -263,103 +377,80 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
if (retcode < 0)
{
#if HAVE_ARMV7
- vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
#endif
pbi->common.error.error_code = VPX_CODEC_ERROR;
pbi->common.error.setjmp = 0;
+ if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
return retcode;
}
- // Update the GF useage maps.
- vp8_update_gf_useage_maps(cm, &pbi->mb);
-
- if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
- vp8_stop_lfthread(pbi);
-
- if (cm->refresh_last_frame)
+ if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION)
{
- vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
-
- cm->frame_to_show = &cm->last_frame;
- }
- else
- {
- cm->frame_to_show = &cm->new_frame;
- }
-
- if (!pbi->b_multithreaded_lf)
+ if (swap_frame_buffers (cm))
+ {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
+#endif
+ pbi->common.error.error_code = VPX_CODEC_ERROR;
+ pbi->common.error.setjmp = 0;
+ return -1;
+ }
+ } else
{
- struct vpx_usec_timer lpftimer;
- vpx_usec_timer_start(&lpftimer);
- // Apply the loop filter if appropriate.
+ if (swap_frame_buffers (cm))
+ {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
+#endif
+ pbi->common.error.error_code = VPX_CODEC_ERROR;
+ pbi->common.error.setjmp = 0;
+ return -1;
+ }
- if (cm->filter_level > 0)
+ if(pbi->common.filter_level)
{
+ struct vpx_usec_timer lpftimer;
+ vpx_usec_timer_start(&lpftimer);
+ /* Apply the loop filter if appropriate. */
+
vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
+
+ vpx_usec_timer_mark(&lpftimer);
+ pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
+
cm->last_frame_type = cm->frame_type;
cm->last_filter_type = cm->filter_type;
cm->last_sharpness_level = cm->sharpness_level;
-
}
-
- vpx_usec_timer_mark(&lpftimer);
- pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
+ vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
}
- vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
-
-
- // DEBUG code
#if 0
+ /* DEBUG code */
+ /*vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);*/
vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
if (cm->current_video_frame <= 5)
write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
#endif
- // If any buffer copy / swaping is signalled it should be done here.
- if (cm->copy_buffer_to_arf)
- {
- if (cm->copy_buffer_to_arf == 1)
- {
- if (cm->refresh_last_frame)
- vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
- else
- vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
- }
- else if (cm->copy_buffer_to_arf == 2)
- vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
- }
-
- if (cm->copy_buffer_to_gf)
- {
- if (cm->copy_buffer_to_gf == 1)
- {
- if (cm->refresh_last_frame)
- vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
- else
- vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
- }
- else if (cm->copy_buffer_to_gf == 2)
- vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
- }
-
- // Should the golden or alternate reference frame be refreshed?
- if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
- {
- if (cm->refresh_golden_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
-
- if (cm->refresh_alt_ref_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
-
- //vpx_log("Decoder: recovery frame received \n");
-
- // Update data structures that monitors GF useage
- vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
- cm->gf_active_count = cm->mb_rows * cm->mb_cols;
- }
-
vp8_clear_system_state();
vpx_usec_timer_mark(&timer);
@@ -367,7 +458,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
pbi->time_decoding += pbi->decode_microseconds;
-// vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);
+ /*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/
if (cm->show_frame)
cm->current_video_frame++;
@@ -410,12 +501,17 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
#endif
#if HAVE_ARMV7
- vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
#endif
pbi->common.error.setjmp = 0;
return retcode;
}
-int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags)
+int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags)
{
int ret = -1;
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
@@ -423,7 +519,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
if (pbi->ready_for_new_data == 1)
return ret;
- // ie no raw frame to show!!!
+ /* ie no raw frame to show!!! */
if (pbi->common.show_frame == 0)
return ret;
@@ -433,7 +529,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
sd->clrtype = pbi->common.clr_type;
#if CONFIG_POSTPROC
- ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags);
+ ret = vp8_post_proc_frame(&pbi->common, sd, flags);
#else
if (pbi->common.frame_to_show)
@@ -449,7 +545,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
ret = -1;
}
-#endif //!CONFIG_POSTPROC
+#endif /*!CONFIG_POSTPROC*/
vp8_clear_system_state();
return ret;
}
diff --git a/vp8/decoder/onyxd_if_sjl.c b/vp8/decoder/onyxd_if_sjl.c
deleted file mode 100644
index 363ad5d72..000000000
--- a/vp8/decoder/onyxd_if_sjl.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "onyxc_int.h"
-#include "postproc.h"
-#include "onyxd.h"
-#include "onyxd_int.h"
-#include "vpx_mem/vpx_mem.h"
-#include "alloccommon.h"
-#include "vpx_scale/yv12extend.h"
-#include "loopfilter.h"
-#include "swapyv12buffer.h"
-#include "g_common.h"
-#include "threading.h"
-#include "decoderthreading.h"
-#include <stdio.h>
-#include "segmentation_common.h"
-#include "quant_common.h"
-#include "vpx_scale/vpxscale.h"
-#include "systemdependent.h"
-#include "vpx_ports/vpx_timer.h"
-
-
-#ifndef VPX_NO_GLOBALS
-static int init_ct = 0;
-#else
-# include "vpx_global_handling.h"
-# define init_ct ((int)vpxglobalm(onyxd,init_ct))
-#endif
-
-extern void vp8_init_loop_filter(VP8_COMMON *cm);
-
-extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
-extern void init_detokenizer(VP8D_COMP *dx);
-
-// DEBUG code
-void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
-{
- FILE *yuv_file = fopen((char *)name, "ab");
- unsigned char *src = s->y_buffer;
- int h = s->y_height;
-
- do
- {
- fwrite(src, s->y_width, 1, yuv_file);
- src += s->y_stride;
- }
- while (--h);
-
- src = s->u_buffer;
- h = s->uv_height;
-
- do
- {
- fwrite(src, s->uv_width, 1, yuv_file);
- src += s->uv_stride;
- }
- while (--h);
-
- src = s->v_buffer;
- h = s->uv_height;
-
- do
- {
- fwrite(src, s->uv_width, 1, yuv_file);
- src += s->uv_stride;
- }
- while (--h);
-
- fclose(yuv_file);
-}
-
-void vp8dx_initialize()
-{
- if (!init_ct++)
- {
- vp8_initialize_common();
- vp8_scale_machine_specific_config();
- }
-}
-
-void vp8dx_shutdown()
-{
- if (!--init_ct)
- {
- vp8_shutdown_common();
- }
-}
-
-
-VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
-{
- VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP));
-
- if (!pbi)
- return NULL;
-
- vpx_memset(pbi, 0, sizeof(VP8D_COMP));
-
- vp8dx_initialize();
-
- vp8_create_common(&pbi->common);
- vp8_dmachine_specific_config(pbi);
-
- pbi->common.current_video_frame = 0;
- pbi->ready_for_new_data = 1;
-
- pbi->CPUFreq = 0; //vp8_get_processor_freq();
- pbi->max_threads = oxcf->max_threads;
- vp8_decoder_create_threads(pbi);
-
- //vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
- // unnecessary calling of vp8cx_init_de_quantizer() for every frame.
- vp8cx_init_de_quantizer(pbi);
-
- {
- VP8_COMMON *cm = &pbi->common;
-
- vp8_init_loop_filter(cm);
- cm->last_frame_type = KEY_FRAME;
- cm->last_filter_type = cm->filter_type;
- cm->last_sharpness_level = cm->sharpness_level;
- }
-
- init_detokenizer(pbi);
-
- return (VP8D_PTR) pbi;
-}
-void vp8dx_remove_decompressor(VP8D_PTR ptr)
-{
- VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-
- if (!pbi)
- return;
-
- vp8_decoder_remove_threads(pbi);
- vp8_remove_common(&pbi->common);
- vpx_free(pbi);
- vp8dx_shutdown();
-
-}
-
-void vp8dx_set_setting(VP8D_PTR comp, VP8D_SETTING oxst, int x)
-{
- VP8D_COMP *pbi = (VP8D_COMP *) comp;
-
- (void) pbi;
- (void) x;
-
- switch (oxst)
- {
- case VP8D_OK:
- break;
- }
-}
-
-int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst)
-{
- VP8D_COMP *pbi = (VP8D_COMP *) comp;
-
- (void) pbi;
-
- switch (oxst)
- {
- case VP8D_OK:
- break;
- }
-
- return -1;
-}
-
-int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
-{
- VP8D_COMP *pbi = (VP8D_COMP *) ptr;
- VP8_COMMON *cm = &pbi->common;
-
- if (ref_frame_flag == VP8_LAST_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
-
- else if (ref_frame_flag == VP8_GOLD_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
-
- else if (ref_frame_flag == VP8_ALT_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
-
- else
- return -1;
-
- return 0;
-}
-int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
-{
- VP8D_COMP *pbi = (VP8D_COMP *) ptr;
- VP8_COMMON *cm = &pbi->common;
-
- if (ref_frame_flag == VP8_LAST_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
-
- else if (ref_frame_flag == VP8_GOLD_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
-
- else if (ref_frame_flag == VP8_ALT_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
-
- else
- return -1;
-
- return 0;
-}
-int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, char *source, INT64 time_stamp)
-{
- VP8D_COMP *pbi = (VP8D_COMP *) ptr;
- VP8_COMMON *cm = &pbi->common;
- int retcode = 0;
-
- struct vpx_usec_timer timer;
- (void) size;
-
-// if(pbi->ready_for_new_data == 0)
-// return -1;
-
- vpx_usec_timer_start(&timer);
-
- if (ptr == 0)
- {
- return -1;
- }
-
- //cm->current_video_frame++;
- pbi->Source = source;
-
- retcode = vp8_decode_frame(pbi);
-
- if (retcode < 0)
- return retcode;
-
- // Update the GF useage maps.
- vp8_update_gf_useage_maps(cm, &pbi->mb);
-
- if (pbi->b_multithreaded)
- vp8_stop_lfthread(pbi);
-
- if (cm->refresh_last_frame)
- {
- vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
-
- cm->frame_to_show = &cm->last_frame;
- }
- else
- {
- cm->frame_to_show = &cm->new_frame;
- }
-
- if (!pbi->b_multithreaded)
- {
- struct vpx_usec_timer lpftimer;
- vpx_usec_timer_start(&lpftimer);
- // Apply the loop filter if appropriate.
-
- if (cm->filter_level > 0)
- {
- vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
- cm->last_frame_type = cm->frame_type;
- cm->last_filter_type = cm->filter_type;
- cm->last_sharpness_level = cm->sharpness_level;
-
- }
-
- vpx_usec_timer_mark(&lpftimer);
- pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
- }
-
- vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
-
-#if 0
- // DEBUG code
- //vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
- if (cm->current_video_frame <= 5)
- write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
-#endif
-
- // If any buffer copy / swaping is signalled it should be done here.
- if (cm->copy_buffer_to_arf)
- {
- if (cm->copy_buffer_to_arf == 1)
- {
- if (cm->refresh_last_frame)
- vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
- else
- vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
- }
- else if (cm->copy_buffer_to_arf == 2)
- vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
- }
-
- if (cm->copy_buffer_to_gf)
- {
- if (cm->copy_buffer_to_gf == 1)
- {
- if (cm->refresh_last_frame)
- vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
- else
- vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
- }
- else if (cm->copy_buffer_to_gf == 2)
- vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
- }
-
- // Should the golden or alternate reference frame be refreshed?
- if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
- {
- if (cm->refresh_golden_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
-
- if (cm->refresh_alt_ref_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
-
- //vpx_log("Decoder: recovery frame received \n");
-
- // Update data structures that monitors GF useage
- vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
- cm->gf_active_count = cm->mb_rows * cm->mb_cols;
- }
-
- vp8_clear_system_state();
-
- vpx_usec_timer_mark(&timer);
- pbi->decode_microseconds = vpx_usec_timer_elapsed(&timer);
-
- pbi->time_decoding += pbi->decode_microseconds;
-
-// vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);
-
- cm->current_video_frame++;
- pbi->ready_for_new_data = 0;
- pbi->last_time_stamp = time_stamp;
-
- {
- int i;
- INT64 earliest_time = pbi->dr[0].time_stamp;
- INT64 latest_time = pbi->dr[0].time_stamp;
- INT64 time_diff = 0;
- int bytes = 0;
-
- pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;;
- pbi->dr[pbi->common.current_video_frame&0xf].time_stamp = time_stamp;
-
- for (i = 0; i < 16; i++)
- {
-
- bytes += pbi->dr[i].size;
-
- if (pbi->dr[i].time_stamp < earliest_time)
- earliest_time = pbi->dr[i].time_stamp;
-
- if (pbi->dr[i].time_stamp > latest_time)
- latest_time = pbi->dr[i].time_stamp;
- }
-
- time_diff = latest_time - earliest_time;
-
- if (time_diff > 0)
- {
- pbi->common.bitrate = 80000.00 * bytes / time_diff ;
- pbi->common.framerate = 160000000.00 / time_diff ;
- }
-
- }
- return retcode;
-}
-int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags)
-{
- int ret = -1;
- VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-
- if (pbi->ready_for_new_data == 1)
- return ret;
-
- // ie no raw frame to show!!!
- if (pbi->common.show_frame == 0)
- return ret;
-
- pbi->ready_for_new_data = 1;
- *time_stamp = pbi->last_time_stamp;
- *time_end_stamp = 0;
-
- sd->clrtype = pbi->common.clr_type;
- ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags);
- vp8_clear_system_state();
- return ret;
-}
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index e8b5d409a..fc1811d7f 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -47,21 +48,20 @@ typedef struct
typedef struct
{
- int *scan;
- UINT8 *ptr_onyxblock2context_leftabove;
- vp8_tree_index *vp8_coef_tree_ptr; //onyx_coef_tree_ptr; ???
- TOKENEXTRABITS *teb_base_ptr;
+ int const *scan;
+ UINT8 const *ptr_block2leftabove;
+ vp8_tree_index const *vp8_coef_tree_ptr;
+ TOKENEXTRABITS const *teb_base_ptr;
unsigned char *norm_ptr;
-// UINT16 *ptr_onyx_coef_bands_x;
- UINT8 *ptr_onyx_coef_bands_x;
+ UINT8 *ptr_coef_bands_x;
- ENTROPY_CONTEXT **A;
- ENTROPY_CONTEXT(*L)[4];
+ ENTROPY_CONTEXT_PLANES *A;
+ ENTROPY_CONTEXT_PLANES *L;
INT16 *qcoeff_start_ptr;
BOOL_DECODER *current_bc;
- UINT8 *coef_probs[4];
+ vp8_prob const *coef_probs[4];
UINT8 eob[25];
@@ -88,27 +88,32 @@ typedef struct VP8Decompressor
unsigned int time_loop_filtering;
volatile int b_multithreaded_rd;
- volatile int b_multithreaded_lf;
int max_threads;
- int last_mb_row_decoded;
int current_mb_col_main;
int decoding_thread_count;
int allocated_decoding_thread_count;
- // variable for threading
- DECLARE_ALIGNED(16, MACROBLOCKD, lpfmb);
+ /* variable for threading */
#if CONFIG_MULTITHREAD
- pthread_t h_thread_lpf; // thread for postprocessing
- sem_t h_event_lpf; // Event for post_proc completed
- sem_t h_event_start_lpf;
-#endif
+ int mt_baseline_filter_level[MAX_MB_SEGMENTS];
+ int sync_range;
+ int *mt_current_mb_col; /* Each row remembers its already decoded column. */
+
+ unsigned char **mt_yabove_row; /* mb_rows x width */
+ unsigned char **mt_uabove_row;
+ unsigned char **mt_vabove_row;
+ unsigned char **mt_yleft_col; /* mb_rows x 16 */
+ unsigned char **mt_uleft_col; /* mb_rows x 8 */
+ unsigned char **mt_vleft_col; /* mb_rows x 8 */
+
MB_ROW_DEC *mb_row_di;
- DECODETHREAD_DATA *de_thread_data;
-#if CONFIG_MULTITHREAD
+ DECODETHREAD_DATA *de_thread_data;
+
pthread_t *h_decoding_thread;
- sem_t *h_event_mbrdecoding;
- sem_t h_event_main;
- // end of threading data
+ sem_t *h_event_start_decoding;
+ sem_t h_event_end_decoding;
+ /* end of threading data */
#endif
+
vp8_reader *mbc;
INT64 last_time_stamp;
int ready_for_new_data;
@@ -122,6 +127,12 @@ typedef struct VP8Decompressor
struct vp8_dboolhuff_rtcd_vtable dboolhuff;
#endif
+
+ vp8_prob prob_intra;
+ vp8_prob prob_last;
+ vp8_prob prob_gf;
+ vp8_prob prob_skip_false;
+
} VP8D_COMP;
int vp8_decode_frame(VP8D_COMP *cpi);
diff --git a/vp8/decoder/reconintra_mt.c b/vp8/decoder/reconintra_mt.c
new file mode 100644
index 000000000..ad4324b27
--- /dev/null
+++ b/vp8/decoder/reconintra_mt.c
@@ -0,0 +1,982 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "recon.h"
+#include "reconintra.h"
+#include "vpx_mem/vpx_mem.h"
+#include "onyxd_int.h"
+
+/* For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
+ * vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
+ */
+
+void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+ unsigned char *yabove_row; /* = x->dst.y_buffer - x->dst.y_stride; */
+ unsigned char *yleft_col;
+ unsigned char yleft_buf[16];
+ unsigned char ytop_left; /* = yabove_row[-1]; */
+ unsigned char *ypred_ptr = x->predictor;
+ int r, c, i;
+
+ if (pbi->common.filter_level)
+ {
+ yabove_row = pbi->mt_yabove_row[mb_row] + mb_col*16 +32;
+ yleft_col = pbi->mt_yleft_col[mb_row];
+ } else
+ {
+ yabove_row = x->dst.y_buffer - x->dst.y_stride;
+
+ for (i = 0; i < 16; i++)
+ yleft_buf[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
+ yleft_col = yleft_buf;
+ }
+
+ ytop_left = yabove_row[-1];
+
+ /* for Y */
+ switch (x->mode_info_context->mbmi.mode)
+ {
+ case DC_PRED:
+ {
+ int expected_dc;
+ int i;
+ int shift;
+ int average = 0;
+
+
+ if (x->up_available || x->left_available)
+ {
+ if (x->up_available)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ average += yabove_row[i];
+ }
+ }
+
+ if (x->left_available)
+ {
+
+ for (i = 0; i < 16; i++)
+ {
+ average += yleft_col[i];
+ }
+
+ }
+
+
+
+ shift = 3 + x->up_available + x->left_available;
+ expected_dc = (average + (1 << (shift - 1))) >> shift;
+ }
+ else
+ {
+ expected_dc = 128;
+ }
+
+ vpx_memset(ypred_ptr, expected_dc, 256);
+ }
+ break;
+ case V_PRED:
+ {
+
+ for (r = 0; r < 16; r++)
+ {
+
+ ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0];
+ ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
+ ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
+ ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
+ ypred_ptr += 16;
+ }
+ }
+ break;
+ case H_PRED:
+ {
+
+ for (r = 0; r < 16; r++)
+ {
+
+ vpx_memset(ypred_ptr, yleft_col[r], 16);
+ ypred_ptr += 16;
+ }
+
+ }
+ break;
+ case TM_PRED:
+ {
+
+ for (r = 0; r < 16; r++)
+ {
+ for (c = 0; c < 16; c++)
+ {
+ int pred = yleft_col[r] + yabove_row[ c] - ytop_left;
+
+ if (pred < 0)
+ pred = 0;
+
+ if (pred > 255)
+ pred = 255;
+
+ ypred_ptr[c] = pred;
+ }
+
+ ypred_ptr += 16;
+ }
+
+ }
+ break;
+ case B_PRED:
+ case NEARESTMV:
+ case NEARMV:
+ case ZEROMV:
+ case NEWMV:
+ case SPLITMV:
+ case MB_MODE_COUNT:
+ break;
+ }
+#else
+ (void) pbi;
+ (void) x;
+ (void) mb_row;
+ (void) mb_col;
+#endif
+}
+
+void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+ unsigned char *yabove_row; /* = x->dst.y_buffer - x->dst.y_stride; */
+ unsigned char *yleft_col;
+ unsigned char yleft_buf[16];
+ unsigned char ytop_left; /* = yabove_row[-1]; */
+ unsigned char *ypred_ptr = x->predictor;
+ int r, c, i;
+
+ int y_stride = x->dst.y_stride;
+ ypred_ptr = x->dst.y_buffer; /*x->predictor;*/
+
+ if (pbi->common.filter_level)
+ {
+ yabove_row = pbi->mt_yabove_row[mb_row] + mb_col*16 +32;
+ yleft_col = pbi->mt_yleft_col[mb_row];
+ } else
+ {
+ yabove_row = x->dst.y_buffer - x->dst.y_stride;
+
+ for (i = 0; i < 16; i++)
+ yleft_buf[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
+ yleft_col = yleft_buf;
+ }
+
+ ytop_left = yabove_row[-1];
+
+ /* for Y */
+ switch (x->mode_info_context->mbmi.mode)
+ {
+ case DC_PRED:
+ {
+ int expected_dc;
+ int i;
+ int shift;
+ int average = 0;
+
+
+ if (x->up_available || x->left_available)
+ {
+ if (x->up_available)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ average += yabove_row[i];
+ }
+ }
+
+ if (x->left_available)
+ {
+
+ for (i = 0; i < 16; i++)
+ {
+ average += yleft_col[i];
+ }
+
+ }
+
+
+
+ shift = 3 + x->up_available + x->left_available;
+ expected_dc = (average + (1 << (shift - 1))) >> shift;
+ }
+ else
+ {
+ expected_dc = 128;
+ }
+
+ /*vpx_memset(ypred_ptr, expected_dc, 256);*/
+ for (r = 0; r < 16; r++)
+ {
+ vpx_memset(ypred_ptr, expected_dc, 16);
+ ypred_ptr += y_stride; /*16;*/
+ }
+ }
+ break;
+ case V_PRED:
+ {
+
+ for (r = 0; r < 16; r++)
+ {
+
+ ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0];
+ ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
+ ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
+ ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
+ ypred_ptr += y_stride; /*16;*/
+ }
+ }
+ break;
+ case H_PRED:
+ {
+
+ for (r = 0; r < 16; r++)
+ {
+
+ vpx_memset(ypred_ptr, yleft_col[r], 16);
+ ypred_ptr += y_stride; /*16;*/
+ }
+
+ }
+ break;
+ case TM_PRED:
+ {
+
+ for (r = 0; r < 16; r++)
+ {
+ for (c = 0; c < 16; c++)
+ {
+ int pred = yleft_col[r] + yabove_row[ c] - ytop_left;
+
+ if (pred < 0)
+ pred = 0;
+
+ if (pred > 255)
+ pred = 255;
+
+ ypred_ptr[c] = pred;
+ }
+
+ ypred_ptr += y_stride; /*16;*/
+ }
+
+ }
+ break;
+ case B_PRED:
+ case NEARESTMV:
+ case NEARMV:
+ case ZEROMV:
+ case NEWMV:
+ case SPLITMV:
+ case MB_MODE_COUNT:
+ break;
+ }
+#else
+ (void) pbi;
+ (void) x;
+ (void) mb_row;
+ (void) mb_col;
+#endif
+}
+
+void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+ unsigned char *uabove_row; /* = x->dst.u_buffer - x->dst.uv_stride; */
+ unsigned char *uleft_col; /*[16];*/
+ unsigned char uleft_buf[8];
+ unsigned char utop_left; /* = uabove_row[-1]; */
+ unsigned char *vabove_row; /* = x->dst.v_buffer - x->dst.uv_stride; */
+ unsigned char *vleft_col; /*[20];*/
+ unsigned char vleft_buf[8];
+ unsigned char vtop_left; /* = vabove_row[-1]; */
+ unsigned char *upred_ptr = &x->predictor[256];
+ unsigned char *vpred_ptr = &x->predictor[320];
+ int i, j;
+
+ if (pbi->common.filter_level)
+ {
+ uabove_row = pbi->mt_uabove_row[mb_row] + mb_col*8 +16;
+ vabove_row = pbi->mt_vabove_row[mb_row] + mb_col*8 +16;
+ uleft_col = pbi->mt_uleft_col[mb_row];
+ vleft_col = pbi->mt_vleft_col[mb_row];
+ } else
+ {
+ uabove_row = x->dst.u_buffer - x->dst.uv_stride;
+ vabove_row = x->dst.v_buffer - x->dst.uv_stride;
+
+ for (i = 0; i < 8; i++)
+ {
+ uleft_buf[i] = x->dst.u_buffer [i* x->dst.uv_stride -1];
+ vleft_buf[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
+ }
+ uleft_col = uleft_buf;
+ vleft_col = vleft_buf;
+ }
+ utop_left = uabove_row[-1];
+ vtop_left = vabove_row[-1];
+
+ switch (x->mode_info_context->mbmi.uv_mode)
+ {
+ case DC_PRED:
+ {
+ int expected_udc;
+ int expected_vdc;
+ int i;
+ int shift;
+ int Uaverage = 0;
+ int Vaverage = 0;
+
+ if (x->up_available)
+ {
+ for (i = 0; i < 8; i++)
+ {
+ Uaverage += uabove_row[i];
+ Vaverage += vabove_row[i];
+ }
+ }
+
+ if (x->left_available)
+ {
+ for (i = 0; i < 8; i++)
+ {
+ Uaverage += uleft_col[i];
+ Vaverage += vleft_col[i];
+ }
+ }
+
+ if (!x->up_available && !x->left_available)
+ {
+ expected_udc = 128;
+ expected_vdc = 128;
+ }
+ else
+ {
+ shift = 2 + x->up_available + x->left_available;
+ expected_udc = (Uaverage + (1 << (shift - 1))) >> shift;
+ expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift;
+ }
+
+
+ vpx_memset(upred_ptr, expected_udc, 64);
+ vpx_memset(vpred_ptr, expected_vdc, 64);
+
+
+ }
+ break;
+ case V_PRED:
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ vpx_memcpy(upred_ptr, uabove_row, 8);
+ vpx_memcpy(vpred_ptr, vabove_row, 8);
+ upred_ptr += 8;
+ vpred_ptr += 8;
+ }
+
+ }
+ break;
+ case H_PRED:
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ vpx_memset(upred_ptr, uleft_col[i], 8);
+ vpx_memset(vpred_ptr, vleft_col[i], 8);
+ upred_ptr += 8;
+ vpred_ptr += 8;
+ }
+ }
+
+ break;
+ case TM_PRED:
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ int predu = uleft_col[i] + uabove_row[j] - utop_left;
+ int predv = vleft_col[i] + vabove_row[j] - vtop_left;
+
+ if (predu < 0)
+ predu = 0;
+
+ if (predu > 255)
+ predu = 255;
+
+ if (predv < 0)
+ predv = 0;
+
+ if (predv > 255)
+ predv = 255;
+
+ upred_ptr[j] = predu;
+ vpred_ptr[j] = predv;
+ }
+
+ upred_ptr += 8;
+ vpred_ptr += 8;
+ }
+
+ }
+ break;
+ case B_PRED:
+ case NEARESTMV:
+ case NEARMV:
+ case ZEROMV:
+ case NEWMV:
+ case SPLITMV:
+ case MB_MODE_COUNT:
+ break;
+ }
+#else
+ (void) pbi;
+ (void) x;
+ (void) mb_row;
+ (void) mb_col;
+#endif
+}
+
+void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+ unsigned char *uabove_row; /* = x->dst.u_buffer - x->dst.uv_stride; */
+ unsigned char *uleft_col; /*[16];*/
+ unsigned char uleft_buf[8];
+ unsigned char utop_left; /* = uabove_row[-1]; */
+ unsigned char *vabove_row; /* = x->dst.v_buffer - x->dst.uv_stride; */
+ unsigned char *vleft_col; /*[20];*/
+ unsigned char vleft_buf[8];
+ unsigned char vtop_left; /* = vabove_row[-1]; */
+ unsigned char *upred_ptr = x->dst.u_buffer; /*&x->predictor[256];*/
+ unsigned char *vpred_ptr = x->dst.v_buffer; /*&x->predictor[320];*/
+ int uv_stride = x->dst.uv_stride;
+ int i, j;
+
+ if (pbi->common.filter_level)
+ {
+ uabove_row = pbi->mt_uabove_row[mb_row] + mb_col*8 +16;
+ vabove_row = pbi->mt_vabove_row[mb_row] + mb_col*8 +16;
+ uleft_col = pbi->mt_uleft_col[mb_row];
+ vleft_col = pbi->mt_vleft_col[mb_row];
+ } else
+ {
+ uabove_row = x->dst.u_buffer - x->dst.uv_stride;
+ vabove_row = x->dst.v_buffer - x->dst.uv_stride;
+
+ for (i = 0; i < 8; i++)
+ {
+ uleft_buf[i] = x->dst.u_buffer [i* x->dst.uv_stride -1];
+ vleft_buf[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
+ }
+ uleft_col = uleft_buf;
+ vleft_col = vleft_buf;
+ }
+ utop_left = uabove_row[-1];
+ vtop_left = vabove_row[-1];
+
+ switch (x->mode_info_context->mbmi.uv_mode)
+ {
+ case DC_PRED:
+ {
+ int expected_udc;
+ int expected_vdc;
+ int i;
+ int shift;
+ int Uaverage = 0;
+ int Vaverage = 0;
+
+ if (x->up_available)
+ {
+ for (i = 0; i < 8; i++)
+ {
+ Uaverage += uabove_row[i];
+ Vaverage += vabove_row[i];
+ }
+ }
+
+ if (x->left_available)
+ {
+ for (i = 0; i < 8; i++)
+ {
+ Uaverage += uleft_col[i];
+ Vaverage += vleft_col[i];
+ }
+ }
+
+ if (!x->up_available && !x->left_available)
+ {
+ expected_udc = 128;
+ expected_vdc = 128;
+ }
+ else
+ {
+ shift = 2 + x->up_available + x->left_available;
+ expected_udc = (Uaverage + (1 << (shift - 1))) >> shift;
+ expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift;
+ }
+
+
+ /*vpx_memset(upred_ptr,expected_udc,64);
+ vpx_memset(vpred_ptr,expected_vdc,64);*/
+ for (i = 0; i < 8; i++)
+ {
+ vpx_memset(upred_ptr, expected_udc, 8);
+ vpx_memset(vpred_ptr, expected_vdc, 8);
+ upred_ptr += uv_stride; /*8;*/
+ vpred_ptr += uv_stride; /*8;*/
+ }
+ }
+ break;
+ case V_PRED:
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ vpx_memcpy(upred_ptr, uabove_row, 8);
+ vpx_memcpy(vpred_ptr, vabove_row, 8);
+ upred_ptr += uv_stride; /*8;*/
+ vpred_ptr += uv_stride; /*8;*/
+ }
+
+ }
+ break;
+ case H_PRED:
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ vpx_memset(upred_ptr, uleft_col[i], 8);
+ vpx_memset(vpred_ptr, vleft_col[i], 8);
+ upred_ptr += uv_stride; /*8;*/
+ vpred_ptr += uv_stride; /*8;*/
+ }
+ }
+
+ break;
+ case TM_PRED:
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ int predu = uleft_col[i] + uabove_row[j] - utop_left;
+ int predv = vleft_col[i] + vabove_row[j] - vtop_left;
+
+ if (predu < 0)
+ predu = 0;
+
+ if (predu > 255)
+ predu = 255;
+
+ if (predv < 0)
+ predv = 0;
+
+ if (predv > 255)
+ predv = 255;
+
+ upred_ptr[j] = predu;
+ vpred_ptr[j] = predv;
+ }
+
+ upred_ptr += uv_stride; /*8;*/
+ vpred_ptr += uv_stride; /*8;*/
+ }
+
+ }
+ break;
+ case B_PRED:
+ case NEARESTMV:
+ case NEARMV:
+ case ZEROMV:
+ case NEWMV:
+ case SPLITMV:
+ case MB_MODE_COUNT:
+ break;
+ }
+#else
+ (void) pbi;
+ (void) x;
+ (void) mb_row;
+ (void) mb_col;
+#endif
+}
+
+
+void vp8mt_predict_intra4x4(VP8D_COMP *pbi,
+ MACROBLOCKD *xd,
+ int b_mode,
+ unsigned char *predictor,
+ int mb_row,
+ int mb_col,
+ int num)
+{
+#if CONFIG_MULTITHREAD
+ int i, r, c;
+
+ unsigned char *Above; /* = *(x->base_dst) + x->dst - x->dst_stride; */
+ unsigned char Left[4];
+ unsigned char top_left; /* = Above[-1]; */
+
+ BLOCKD *x = &xd->block[num];
+
+ /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/
+ if (num < 4 && pbi->common.filter_level)
+ Above = pbi->mt_yabove_row[mb_row] + mb_col*16 + num*4 + 32;
+ else
+ Above = *(x->base_dst) + x->dst - x->dst_stride;
+
+ if (num%4==0 && pbi->common.filter_level)
+ {
+ for (i=0; i<4; i++)
+ Left[i] = pbi->mt_yleft_col[mb_row][num + i];
+ }else
+ {
+ Left[0] = (*(x->base_dst))[x->dst - 1];
+ Left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride];
+ Left[2] = (*(x->base_dst))[x->dst - 1 + 2 * x->dst_stride];
+ Left[3] = (*(x->base_dst))[x->dst - 1 + 3 * x->dst_stride];
+ }
+
+ if ((num==4 || num==8 || num==12) && pbi->common.filter_level)
+ top_left = pbi->mt_yleft_col[mb_row][num-1];
+ else
+ top_left = Above[-1];
+
+ switch (b_mode)
+ {
+ case B_DC_PRED:
+ {
+ int expected_dc = 0;
+
+ for (i = 0; i < 4; i++)
+ {
+ expected_dc += Above[i];
+ expected_dc += Left[i];
+ }
+
+ expected_dc = (expected_dc + 4) >> 3;
+
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ predictor[c] = expected_dc;
+ }
+
+ predictor += 16;
+ }
+ }
+ break;
+ case B_TM_PRED:
+ {
+ /* prediction similar to true_motion prediction */
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ int pred = Above[c] - top_left + Left[r];
+
+ if (pred < 0)
+ pred = 0;
+
+ if (pred > 255)
+ pred = 255;
+
+ predictor[c] = pred;
+ }
+
+ predictor += 16;
+ }
+ }
+ break;
+
+ case B_VE_PRED:
+ {
+
+ unsigned int ap[4];
+ ap[0] = (top_left + 2 * Above[0] + Above[1] + 2) >> 2;
+ ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2;
+ ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2;
+ ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2;
+
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+
+ predictor[c] = ap[c];
+ }
+
+ predictor += 16;
+ }
+
+ }
+ break;
+
+
+ case B_HE_PRED:
+ {
+
+ unsigned int lp[4];
+ lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2;
+ lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2;
+ lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2;
+ lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2;
+
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ predictor[c] = lp[r];
+ }
+
+ predictor += 16;
+ }
+ }
+ break;
+ case B_LD_PRED:
+ {
+ unsigned char *ptr = Above;
+ predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2;
+ predictor[0 * 16 + 1] =
+ predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2;
+ predictor[0 * 16 + 2] =
+ predictor[1 * 16 + 1] =
+ predictor[2 * 16 + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2;
+ predictor[0 * 16 + 3] =
+ predictor[1 * 16 + 2] =
+ predictor[2 * 16 + 1] =
+ predictor[3 * 16 + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2;
+ predictor[1 * 16 + 3] =
+ predictor[2 * 16 + 2] =
+ predictor[3 * 16 + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2;
+ predictor[2 * 16 + 3] =
+ predictor[3 * 16 + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2;
+ predictor[3 * 16 + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2;
+
+ }
+ break;
+ case B_RD_PRED:
+ {
+
+ unsigned char pp[9];
+
+ pp[0] = Left[3];
+ pp[1] = Left[2];
+ pp[2] = Left[1];
+ pp[3] = Left[0];
+ pp[4] = top_left;
+ pp[5] = Above[0];
+ pp[6] = Above[1];
+ pp[7] = Above[2];
+ pp[8] = Above[3];
+
+ predictor[3 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[3 * 16 + 1] =
+ predictor[2 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[3 * 16 + 2] =
+ predictor[2 * 16 + 1] =
+ predictor[1 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[3 * 16 + 3] =
+ predictor[2 * 16 + 2] =
+ predictor[1 * 16 + 1] =
+ predictor[0 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[2 * 16 + 3] =
+ predictor[1 * 16 + 2] =
+ predictor[0 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[1 * 16 + 3] =
+ predictor[0 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ predictor[0 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
+
+ }
+ break;
+ case B_VR_PRED:
+ {
+
+ unsigned char pp[9];
+
+ pp[0] = Left[3];
+ pp[1] = Left[2];
+ pp[2] = Left[1];
+ pp[3] = Left[0];
+ pp[4] = top_left;
+ pp[5] = Above[0];
+ pp[6] = Above[1];
+ pp[7] = Above[2];
+ pp[8] = Above[3];
+
+
+ predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[3 * 16 + 1] =
+ predictor[1 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[2 * 16 + 1] =
+ predictor[0 * 16 + 0] = (pp[4] + pp[5] + 1) >> 1;
+ predictor[3 * 16 + 2] =
+ predictor[1 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[2 * 16 + 2] =
+ predictor[0 * 16 + 1] = (pp[5] + pp[6] + 1) >> 1;
+ predictor[3 * 16 + 3] =
+ predictor[1 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ predictor[2 * 16 + 3] =
+ predictor[0 * 16 + 2] = (pp[6] + pp[7] + 1) >> 1;
+ predictor[1 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
+ predictor[0 * 16 + 3] = (pp[7] + pp[8] + 1) >> 1;
+
+ }
+ break;
+ case B_VL_PRED:
+ {
+
+ unsigned char *pp = Above;
+
+ predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
+ predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[2 * 16 + 0] =
+ predictor[0 * 16 + 1] = (pp[1] + pp[2] + 1) >> 1;
+ predictor[1 * 16 + 1] =
+ predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * 16 + 1] =
+ predictor[0 * 16 + 2] = (pp[2] + pp[3] + 1) >> 1;
+ predictor[3 * 16 + 1] =
+ predictor[1 * 16 + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[0 * 16 + 3] =
+ predictor[2 * 16 + 2] = (pp[3] + pp[4] + 1) >> 1;
+ predictor[1 * 16 + 3] =
+ predictor[3 * 16 + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[2 * 16 + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[3 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ }
+ break;
+
+ case B_HD_PRED:
+ {
+ unsigned char pp[9];
+ pp[0] = Left[3];
+ pp[1] = Left[2];
+ pp[2] = Left[1];
+ pp[3] = Left[0];
+ pp[4] = top_left;
+ pp[5] = Above[0];
+ pp[6] = Above[1];
+ pp[7] = Above[2];
+ pp[8] = Above[3];
+
+
+ predictor[3 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
+ predictor[3 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[2 * 16 + 0] =
+ predictor[3 * 16 + 2] = (pp[1] + pp[2] + 1) >> 1;
+ predictor[2 * 16 + 1] =
+ predictor[3 * 16 + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * 16 + 2] =
+ predictor[1 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
+ predictor[2 * 16 + 3] =
+ predictor[1 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[1 * 16 + 2] =
+ predictor[0 * 16 + 0] = (pp[3] + pp[4] + 1) >> 1;
+ predictor[1 * 16 + 3] =
+ predictor[0 * 16 + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[0 * 16 + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[0 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ }
+ break;
+
+
+ case B_HU_PRED:
+ {
+ unsigned char *pp = Left;
+ predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
+ predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[0 * 16 + 2] =
+ predictor[1 * 16 + 0] = (pp[1] + pp[2] + 1) >> 1;
+ predictor[0 * 16 + 3] =
+ predictor[1 * 16 + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[1 * 16 + 2] =
+ predictor[2 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
+ predictor[1 * 16 + 3] =
+ predictor[2 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * 16 + 2] =
+ predictor[2 * 16 + 3] =
+ predictor[3 * 16 + 0] =
+ predictor[3 * 16 + 1] =
+ predictor[3 * 16 + 2] =
+ predictor[3 * 16 + 3] = pp[3];
+ }
+ break;
+
+
+ }
+#else
+ (void) pbi;
+ (void) xd;
+ (void) b_mode;
+ (void) predictor;
+ (void) mb_row;
+ (void) mb_col;
+ (void) num;
+#endif
+}
+
+/* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
+ * to the right prediction have filled in pixels to use.
+ */
+void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+ unsigned char *above_right; /* = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16; */
+ unsigned int *src_ptr;
+ unsigned int *dst_ptr0;
+ unsigned int *dst_ptr1;
+ unsigned int *dst_ptr2;
+
+ if (pbi->common.filter_level)
+ above_right = pbi->mt_yabove_row[mb_row] + mb_col*16 + 32 +16;
+ else
+ above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
+
+ src_ptr = (unsigned int *)above_right;
+ /*dst_ptr0 = (unsigned int *)(above_right + 4 * x->block[0].dst_stride);
+ dst_ptr1 = (unsigned int *)(above_right + 8 * x->block[0].dst_stride);
+ dst_ptr2 = (unsigned int *)(above_right + 12 * x->block[0].dst_stride);*/
+ dst_ptr0 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 3 * x->block[0].dst_stride);
+ dst_ptr1 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 7 * x->block[0].dst_stride);
+ dst_ptr2 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 11 * x->block[0].dst_stride);
+ *dst_ptr0 = *src_ptr;
+ *dst_ptr1 = *src_ptr;
+ *dst_ptr2 = *src_ptr;
+#else
+ (void) pbi;
+ (void) x;
+ (void) mb_row;
+ (void) mb_col;
+#endif
+}
diff --git a/vp8/decoder/reconintra_mt.h b/vp8/decoder/reconintra_mt.h
new file mode 100644
index 000000000..d401295b2
--- /dev/null
+++ b/vp8/decoder/reconintra_mt.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef __INC_RECONINTRA_MT_H
+#define __INC_RECONINTRA_MT_H
+
+/* reconintra functions used in multi-threaded decoder */
+#if CONFIG_MULTITHREAD
+extern void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+extern void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+extern void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+extern void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+
+extern void vp8mt_predict_intra4x4(VP8D_COMP *pbi, MACROBLOCKD *x, int b_mode, unsigned char *predictor, int mb_row, int mb_col, int num);
+extern void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+#endif
+
+#endif
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index e35d1757f..fc2fad516 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -1,16 +1,20 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WIN32
# include <unistd.h>
#endif
+#ifdef __APPLE__
+#include <mach/mach_init.h>
+#endif
#include "onyxd_int.h"
#include "vpx_mem/vpx_mem.h"
#include "threading.h"
@@ -18,20 +22,22 @@
#include "loopfilter.h"
#include "extend.h"
#include "vpx_ports/vpx_timer.h"
+#include "detokenize.h"
+#include "reconinter.h"
+#include "reconintra_mt.h"
-extern void vp8_decode_mb_row(VP8D_COMP *pbi,
- VP8_COMMON *pc,
- int mb_row,
- MACROBLOCKD *xd);
-
+extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
+extern void clamp_mvs(MACROBLOCKD *xd);
extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
-extern void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd);
+
+#if CONFIG_RUNTIME_CPU_DETECT
+#define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
+#else
+#define RTCD_VTABLE(x) NULL
+#endif
void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
{
-
-
-
#if CONFIG_MULTITHREAD
VP8_COMMON *const pc = & pbi->common;
int i, j;
@@ -42,15 +48,11 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
#if CONFIG_RUNTIME_CPU_DETECT
mbd->rtcd = xd->rtcd;
#endif
-
-
mbd->subpixel_predict = xd->subpixel_predict;
mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
- mbd->gf_active_ptr = xd->gf_active_ptr;
- mbd->mode_info = pc->mi - 1;
mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1);
mbd->mode_info_stride = pc->mode_info_stride;
@@ -58,11 +60,8 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
mbd->frames_since_golden = pc->frames_since_golden;
mbd->frames_till_alt_ref_frame = pc->frames_till_alt_ref_frame;
- mbd->pre = pc->last_frame;
- mbd->dst = pc->new_frame;
-
-
-
+ mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
+ mbd->dst = pc->yv12_fb[pc->new_fb_idx];
vp8_setup_block_dptrs(mbd);
vp8_build_block_doffsets(mbd);
@@ -70,8 +69,14 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
- mbd->mbmi.mode = DC_PRED;
- mbd->mbmi.uv_mode = DC_PRED;
+ /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
+ vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
+ /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
+ vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
+ /*unsigned char mode_ref_lf_delta_enabled;
+ unsigned char mode_ref_lf_delta_update;*/
+ mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled;
+ mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update;
mbd->current_bc = &pbi->bc2;
@@ -81,6 +86,8 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
}
}
+ for (i=0; i< pc->mb_rows; i++)
+ pbi->mt_current_mb_col[i]=-1;
#else
(void) pbi;
(void) xd;
@@ -90,348 +97,390 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
}
-THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
+void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
{
#if CONFIG_MULTITHREAD
- int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
- VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
- MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
- ENTROPY_CONTEXT mb_row_left_context[4][4];
+ int eobtotal = 0;
+ int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
+ VP8_COMMON *pc = &pbi->common;
- while (1)
+ if (xd->mode_info_context->mbmi.mb_skip_coeff)
{
- if (pbi->b_multithreaded_rd == 0)
- break;
-
- //if(WaitForSingleObject(pbi->h_event_mbrdecoding[ithread], INFINITE) == WAIT_OBJECT_0)
- if (sem_wait(&pbi->h_event_mbrdecoding[ithread]) == 0)
- {
- if (pbi->b_multithreaded_rd == 0)
- break;
- else
- {
- VP8_COMMON *pc = &pbi->common;
- int mb_row = mbrd->mb_row;
- MACROBLOCKD *xd = &mbrd->mbd;
-
- //printf("ithread:%d mb_row %d\n", ithread, mb_row);
- int i;
- int recon_yoffset, recon_uvoffset;
- int mb_col;
- int recon_y_stride = pc->last_frame.y_stride;
- int recon_uv_stride = pc->last_frame.uv_stride;
-
- volatile int *last_row_current_mb_col;
-
- if (ithread > 0)
- last_row_current_mb_col = &pbi->mb_row_di[ithread-1].current_mb_col;
- else
- last_row_current_mb_col = &pbi->current_mb_col_main;
-
- recon_yoffset = mb_row * recon_y_stride * 16;
- recon_uvoffset = mb_row * recon_uv_stride * 8;
- // reset above block coeffs
-
- xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
- xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
- xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
- xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
- xd->left_context = mb_row_left_context;
- vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context));
- xd->up_available = (mb_row != 0);
-
- xd->mb_to_top_edge = -((mb_row * 16)) << 3;
- xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
-
- for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
- {
-
- while (mb_col > (*last_row_current_mb_col - 1) && *last_row_current_mb_col != pc->mb_cols - 1)
- {
- x86_pause_hint();
- thread_sleep(0);
- }
-
- // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
- vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) );
-
- if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
- {
- for (i = 0; i < 16; i++)
- {
- BLOCKD *d = &xd->block[i];
- vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
- }
- }
-
- // Distance of Mb to the various image edges.
- // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
- xd->mb_to_left_edge = -((mb_col * 16) << 3);
- xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
-
- xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
- xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
- xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
-
- xd->left_available = (mb_col != 0);
-
- // Select the appropriate reference frame for this MB
- if (xd->mbmi.ref_frame == LAST_FRAME)
- {
- xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
- }
- else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
- {
- // Golden frame reconstruction buffer
- xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
- }
- else
- {
- // Alternate reference frame reconstruction buffer
- xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
- }
-
- vp8_build_uvmvs(xd, pc->full_pixel);
-
- vp8dx_bool_decoder_fill(xd->current_bc);
- vp8_decode_macroblock(pbi, xd);
-
+ vp8_reset_mb_tokens_context(xd);
+ }
+ else
+ {
+ eobtotal = vp8_decode_mb_tokens(pbi, xd);
+ }
- recon_yoffset += 16;
- recon_uvoffset += 8;
+ /* Perform temporary clamping of the MV to be used for prediction */
+ if (do_clamp)
+ {
+ clamp_mvs(xd);
+ }
- ++xd->mode_info_context; /* next mb */
+ xd->mode_info_context->mbmi.dc_diff = 1;
- xd->gf_active_ptr++; // GF useage flag for next MB
+ if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
+ {
+ xd->mode_info_context->mbmi.dc_diff = 0;
- xd->above_context[Y1CONTEXT] += 4;
- xd->above_context[UCONTEXT ] += 2;
- xd->above_context[VCONTEXT ] += 2;
- xd->above_context[Y2CONTEXT] ++;
- pbi->mb_row_di[ithread].current_mb_col = mb_col;
+ /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
+ if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
+ {
+ vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
+ vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
+ }
+ else
+ {
+ vp8_build_inter_predictors_mb_s(xd);
+ }
+ return;
+ }
- }
+ if (xd->segmentation_enabled)
+ mb_init_dequantizer(pbi, xd);
- // adjust to the next row of mbs
- vp8_extend_mb_row(
- &pc->new_frame,
- xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
- );
+ /* do prediction */
+ if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
+ {
+ vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
- ++xd->mode_info_context; /* skip prediction column */
+ if (xd->mode_info_context->mbmi.mode != B_PRED)
+ {
+ vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
+ } else {
+ vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
+ }
+ }
+ else
+ {
+ vp8_build_inter_predictors_mb(xd);
+ }
- // since we have multithread
- xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
+ /* dequantization and idct */
+ if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
+ {
+ BLOCKD *b = &xd->block[24];
+ DEQUANT_INVOKE(&pbi->dequant, block)(b);
- //memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
- if ((mb_row & 1) == 1)
- {
- pbi->last_mb_row_decoded = mb_row;
- //printf("S%d", pbi->last_mb_row_decoded);
- }
+ /* do 2nd order transform on the dc block */
+ if (xd->eobs[24] > 1)
+ {
+ IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
+ ((int *)b->qcoeff)[0] = 0;
+ ((int *)b->qcoeff)[1] = 0;
+ ((int *)b->qcoeff)[2] = 0;
+ ((int *)b->qcoeff)[3] = 0;
+ ((int *)b->qcoeff)[4] = 0;
+ ((int *)b->qcoeff)[5] = 0;
+ ((int *)b->qcoeff)[6] = 0;
+ ((int *)b->qcoeff)[7] = 0;
+ }
+ else
+ {
+ IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
+ ((int *)b->qcoeff)[0] = 0;
+ }
- if (ithread == (pbi->decoding_thread_count - 1) || mb_row == pc->mb_rows - 1)
- {
- //SetEvent(pbi->h_event_main);
- sem_post(&pbi->h_event_main);
+ DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
+ (xd->qcoeff, xd->block[0].dequant,
+ xd->predictor, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->eobs, xd->block[24].diff);
+ }
+ else if ((xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ BLOCKD *b = &xd->block[i];
+ vp8mt_predict_intra4x4(pbi, xd, b->bmi.mode, b->predictor, mb_row, mb_col, i);
- }
+ if (xd->eobs[i] > 1)
+ {
+ DEQUANT_INVOKE(&pbi->dequant, idct_add)
+ (b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
+ }
+ else
+ {
+ IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
+ (b->qcoeff[0] * b->dequant[0], b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
+ ((int *)b->qcoeff)[0] = 0;
}
}
}
+ else
+ {
+ DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
+ (xd->qcoeff, xd->block[0].dequant,
+ xd->predictor, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->eobs);
+ }
+ DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
+ (xd->qcoeff+16*16, xd->block[16].dequant,
+ xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.uv_stride, xd->eobs+16);
#else
- (void) p_data;
+ (void) pbi;
+ (void) xd;
+ (void) mb_row;
+ (void) mb_col;
#endif
-
- return 0 ;
}
-THREAD_FUNCTION vp8_thread_loop_filter(void *p_data)
+
+THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
{
#if CONFIG_MULTITHREAD
- VP8D_COMP *pbi = (VP8D_COMP *)p_data;
+ int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
+ VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
+ MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
+ ENTROPY_CONTEXT_PLANES mb_row_left_context;
while (1)
{
- if (pbi->b_multithreaded_lf == 0)
+ if (pbi->b_multithreaded_rd == 0)
break;
- //printf("before waiting for start_lpf\n");
-
- //if(WaitForSingleObject(pbi->h_event_start_lpf, INFINITE) == WAIT_OBJECT_0)
- if (sem_wait(&pbi->h_event_start_lpf) == 0)
+ /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
+ if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
{
- if (pbi->b_multithreaded_lf == 0) // we're shutting down
+ if (pbi->b_multithreaded_rd == 0)
break;
else
{
+ VP8_COMMON *pc = &pbi->common;
+ MACROBLOCKD *xd = &mbrd->mbd;
- VP8_COMMON *cm = &pbi->common;
- MACROBLOCKD *mbd = &pbi->lpfmb;
- int default_filt_lvl = pbi->common.filter_level;
+ int mb_row;
+ int num_part = 1 << pbi->common.multi_token_partition;
+ volatile int *last_row_current_mb_col;
+ int nsync = pbi->sync_range;
- YV12_BUFFER_CONFIG *post = &cm->new_frame;
- loop_filter_info *lfi = cm->lf_info;
+ for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
+ {
+ int i;
+ int recon_yoffset, recon_uvoffset;
+ int mb_col;
+ int ref_fb_idx = pc->lst_fb_idx;
+ int dst_fb_idx = pc->new_fb_idx;
+ int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
- int mb_row;
- int mb_col;
+ int filter_level;
+ loop_filter_info *lfi = pc->lf_info;
+ int alt_flt_enabled = xd->segmentation_enabled;
+ int Segment;
+ pbi->mb_row_di[ithread].mb_row = mb_row;
+ pbi->mb_row_di[ithread].mbd.current_bc = &pbi->mbc[mb_row%num_part];
- int baseline_filter_level[MAX_MB_SEGMENTS];
- int filter_level;
- int alt_flt_enabled = mbd->segmentation_enabled;
+ last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
- int i;
- unsigned char *y_ptr, *u_ptr, *v_ptr;
+ recon_yoffset = mb_row * recon_y_stride * 16;
+ recon_uvoffset = mb_row * recon_uv_stride * 8;
+ /* reset above block coeffs */
- volatile int *last_mb_row_decoded = &pbi->last_mb_row_decoded;
+ xd->above_context = pc->above_context;
+ xd->left_context = &mb_row_left_context;
+ vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
+ xd->up_available = (mb_row != 0);
- //MODE_INFO * this_mb_mode_info = cm->mi;
- mbd->mode_info_context = cm->mi; // Point at base of Mb MODE_INFO list
+ xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+ xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
- // Note the baseline filter values for each segment
- if (alt_flt_enabled)
- {
- for (i = 0; i < MAX_MB_SEGMENTS; i++)
+ for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
{
- if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
- baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- else
+ if ((mb_col & (nsync-1)) == 0)
{
- baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range
+ while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
+ {
+ x86_pause_hint();
+ thread_sleep(0);
+ }
}
- }
- }
- else
- {
- for (i = 0; i < MAX_MB_SEGMENTS; i++)
- baseline_filter_level[i] = default_filt_lvl;
- }
- // Initialize the loop filter for this frame.
- vp8_init_loop_filter(cm);
+ if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ BLOCKD *d = &xd->block[i];
+ vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
+ }
+ }
- // Set up the buffer pointers
- y_ptr = post->y_buffer;
- u_ptr = post->u_buffer;
- v_ptr = post->v_buffer;
+ if(pbi->common.filter_level)
+ {
+ /*update loopfilter info*/
+ Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+ filter_level = pbi->mt_baseline_filter_level[Segment];
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ * Apply any context driven MB level adjustment
+ */
+ vp8_adjust_mb_lf_value(xd, &filter_level);
+ }
- // vp8_filter each macro block
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
- {
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ */
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
- while (mb_row >= *last_mb_row_decoded)
- {
- x86_pause_hint();
- thread_sleep(0);
- }
-
- //printf("R%d", mb_row);
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
- {
- int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
+ xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
- filter_level = baseline_filter_level[Segment];
+ xd->left_available = (mb_col != 0);
- // Apply any context driven MB level adjustment
- vp8_adjust_mb_lf_value(mbd, &filter_level);
-
- if (filter_level)
- {
- if (mb_col > 0)
- cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+ /* Select the appropriate reference frame for this MB */
+ if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ ref_fb_idx = pc->lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+ ref_fb_idx = pc->gld_fb_idx;
+ else
+ ref_fb_idx = pc->alt_fb_idx;
- if (mbd->mode_info_context->mbmi.dc_diff > 0)
- cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+ xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
- // don't apply across umv border
- if (mb_row > 0)
- cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+ vp8_build_uvmvs(xd, pc->full_pixel);
+ vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
- if (mbd->mode_info_context->mbmi.dc_diff > 0)
- cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+ if (pbi->common.filter_level)
+ {
+ if( mb_row != pc->mb_rows-1 )
+ {
+ /* Save decoded MB last row data for next-row decoding */
+ vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
+ vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
+ vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
+ }
+
+ /* save left_col for next MB decoding */
+ if(mb_col != pc->mb_cols-1)
+ {
+ MODE_INFO *next = xd->mode_info_context +1;
+
+ if (xd->frame_type == KEY_FRAME || next->mbmi.ref_frame == INTRA_FRAME)
+ {
+ for (i = 0; i < 16; i++)
+ pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
+ for (i = 0; i < 8; i++)
+ {
+ pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
+ pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
+ }
+ }
+ }
+
+ /* loopfilter on this macroblock. */
+ if (filter_level)
+ {
+ if (mb_col > 0)
+ pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+ if (xd->mode_info_context->mbmi.dc_diff > 0)
+ pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+ if (xd->mode_info_context->mbmi.dc_diff > 0)
+ pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+ }
}
- y_ptr += 16;
- u_ptr += 8;
- v_ptr += 8;
+ recon_yoffset += 16;
+ recon_uvoffset += 8;
+
+ ++xd->mode_info_context; /* next mb */
- mbd->mode_info_context++; // step to next MB
+ xd->above_context++;
+ /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
+ pbi->mt_current_mb_col[mb_row] = mb_col;
}
- y_ptr += post->y_stride * 16 - post->y_width;
- u_ptr += post->uv_stride * 8 - post->uv_width;
- v_ptr += post->uv_stride * 8 - post->uv_width;
+ /* adjust to the next row of mbs */
+ if (pbi->common.filter_level)
+ {
+ if(mb_row != pc->mb_rows-1)
+ {
+ int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
+ int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
+
+ for (i = 0; i < 4; i++)
+ {
+ pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
+ pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
+ pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
+ }
+ }
+ } else
+ vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
- mbd->mode_info_context++; // Skip border mb
- }
+ ++xd->mode_info_context; /* skip prediction column */
- //printf("R%d\n", mb_row);
- // When done, signal main thread that ME is finished
- //SetEvent(pbi->h_event_lpf);
- sem_post(&pbi->h_event_lpf);
+ /* since we have multithread */
+ xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
+ }
}
-
+ }
+ /* add this to each frame */
+ if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
+ {
+ /*SetEvent(pbi->h_event_end_decoding);*/
+ sem_post(&pbi->h_event_end_decoding);
}
}
-
#else
(void) p_data;
#endif
- return 0;
+
+ return 0 ;
}
+
void vp8_decoder_create_threads(VP8D_COMP *pbi)
{
#if CONFIG_MULTITHREAD
int core_count = 0;
int ithread;
+ int i;
pbi->b_multithreaded_rd = 0;
- pbi->b_multithreaded_lf = 0;
pbi->allocated_decoding_thread_count = 0;
- core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads; //vp8_get_proc_core_count();
- if (core_count > 1)
- {
- sem_init(&pbi->h_event_lpf, 0, 0);
- sem_init(&pbi->h_event_start_lpf, 0, 0);
- pbi->b_multithreaded_lf = 1;
- pthread_create(&pbi->h_thread_lpf, 0, vp8_thread_loop_filter, (pbi));
- }
+ core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
if (core_count > 1)
{
pbi->b_multithreaded_rd = 1;
- pbi->decoding_thread_count = core_count - 1;
+ pbi->decoding_thread_count = core_count -1;
CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
- CHECK_MEM_ERROR(pbi->h_event_mbrdecoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
+ CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
{
- sem_init(&pbi->h_event_mbrdecoding[ithread], 0, 0);
+ sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
pbi->de_thread_data[ithread].ithread = ithread;
pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread];
pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
-
}
- sem_init(&pbi->h_event_main, 0, 0);
+ sem_init(&pbi->h_event_end_decoding, 0, 0);
+
pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
}
@@ -440,45 +489,196 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
#endif
}
-void vp8_decoder_remove_threads(VP8D_COMP *pbi)
+
+void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
{
#if CONFIG_MULTITHREAD
+ VP8_COMMON *const pc = & pbi->common;
+ int i;
- if (pbi->b_multithreaded_lf)
- {
- pbi->b_multithreaded_lf = 0;
- sem_post(&pbi->h_event_start_lpf);
- pthread_join(pbi->h_thread_lpf, 0);
- sem_destroy(&pbi->h_event_start_lpf);
- }
-
- //shutdown MB Decoding thread;
if (pbi->b_multithreaded_rd)
{
- pbi->b_multithreaded_rd = 0;
- // allow all threads to exit
+ if (pbi->mt_current_mb_col)
{
- int i;
+ vpx_free(pbi->mt_current_mb_col);
+ pbi->mt_current_mb_col = NULL ;
+ }
- for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+ /* Free above_row buffers. */
+ if (pbi->mt_yabove_row)
+ {
+ for (i=0; i< mb_rows; i++)
{
+ if (pbi->mt_yabove_row[i])
+ {
+ vpx_free(pbi->mt_yabove_row[i]);
+ pbi->mt_yabove_row[i] = NULL ;
+ }
+ }
+ vpx_free(pbi->mt_yabove_row);
+ pbi->mt_yabove_row = NULL ;
+ }
- sem_post(&pbi->h_event_mbrdecoding[i]);
- pthread_join(pbi->h_decoding_thread[i], NULL);
+ if (pbi->mt_uabove_row)
+ {
+ for (i=0; i< mb_rows; i++)
+ {
+ if (pbi->mt_uabove_row[i])
+ {
+ vpx_free(pbi->mt_uabove_row[i]);
+ pbi->mt_uabove_row[i] = NULL ;
+ }
}
+ vpx_free(pbi->mt_uabove_row);
+ pbi->mt_uabove_row = NULL ;
}
+
+ if (pbi->mt_vabove_row)
{
+ for (i=0; i< mb_rows; i++)
+ {
+ if (pbi->mt_vabove_row[i])
+ {
+ vpx_free(pbi->mt_vabove_row[i]);
+ pbi->mt_vabove_row[i] = NULL ;
+ }
+ }
+ vpx_free(pbi->mt_vabove_row);
+ pbi->mt_vabove_row = NULL ;
+ }
- int i;
- for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+ /* Free left_col buffers. */
+ if (pbi->mt_yleft_col)
+ {
+ for (i=0; i< mb_rows; i++)
+ {
+ if (pbi->mt_yleft_col[i])
+ {
+ vpx_free(pbi->mt_yleft_col[i]);
+ pbi->mt_yleft_col[i] = NULL ;
+ }
+ }
+ vpx_free(pbi->mt_yleft_col);
+ pbi->mt_yleft_col = NULL ;
+ }
+
+ if (pbi->mt_uleft_col)
+ {
+ for (i=0; i< mb_rows; i++)
{
- sem_destroy(&pbi->h_event_mbrdecoding[i]);
+ if (pbi->mt_uleft_col[i])
+ {
+ vpx_free(pbi->mt_uleft_col[i]);
+ pbi->mt_uleft_col[i] = NULL ;
+ }
}
+ vpx_free(pbi->mt_uleft_col);
+ pbi->mt_uleft_col = NULL ;
+ }
+
+ if (pbi->mt_vleft_col)
+ {
+ for (i=0; i< mb_rows; i++)
+ {
+ if (pbi->mt_vleft_col[i])
+ {
+ vpx_free(pbi->mt_vleft_col[i]);
+ pbi->mt_vleft_col[i] = NULL ;
+ }
+ }
+ vpx_free(pbi->mt_vleft_col);
+ pbi->mt_vleft_col = NULL ;
+ }
+ }
+#else
+ (void) pbi;
+#endif
+}
+
+int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
+{
+#if CONFIG_MULTITHREAD
+ VP8_COMMON *const pc = & pbi->common;
+ int i;
+ int uv_width;
+
+ if (pbi->b_multithreaded_rd)
+ {
+ vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
+
+ /* our internal buffers are always multiples of 16 */
+ if ((width & 0xf) != 0)
+ width += 16 - (width & 0xf);
+
+ if (width < 640) pbi->sync_range = 1;
+ else if (width <= 1280) pbi->sync_range = 8;
+ else if (width <= 2560) pbi->sync_range =16;
+ else pbi->sync_range = 32;
+
+ uv_width = width >>1;
+
+ /* Allocate an int for each mb row. */
+ CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
+
+ /* Allocate memory for above_row buffers. */
+ CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+ for (i=0; i< pc->mb_rows; i++)
+ CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
+
+ CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+ for (i=0; i< pc->mb_rows; i++)
+ CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
+
+ CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+ for (i=0; i< pc->mb_rows; i++)
+ CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
+
+ /* Allocate memory for left_col buffers. */
+ CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+ for (i=0; i< pc->mb_rows; i++)
+ CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
+
+ CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+ for (i=0; i< pc->mb_rows; i++)
+ CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
+
+ CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+ for (i=0; i< pc->mb_rows; i++)
+ CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
+ }
+ return 0;
+#else
+ (void) pbi;
+ (void) width;
+#endif
+}
+
+
+void vp8_decoder_remove_threads(VP8D_COMP *pbi)
+{
+#if CONFIG_MULTITHREAD
+ /* shutdown MB Decoding thread; */
+ if (pbi->b_multithreaded_rd)
+ {
+ int i;
+
+ pbi->b_multithreaded_rd = 0;
+
+ /* allow all threads to exit */
+ for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+ {
+ sem_post(&pbi->h_event_start_decoding[i]);
+ pthread_join(pbi->h_decoding_thread[i], NULL);
+ }
+
+ for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+ {
+ sem_destroy(&pbi->h_event_start_decoding[i]);
}
- sem_destroy(&pbi->h_event_main);
+ sem_destroy(&pbi->h_event_end_decoding);
if (pbi->h_decoding_thread)
{
@@ -486,10 +686,10 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
pbi->h_decoding_thread = NULL;
}
- if (pbi->h_event_mbrdecoding)
+ if (pbi->h_event_start_decoding)
{
- vpx_free(pbi->h_event_mbrdecoding);
- pbi->h_event_mbrdecoding = NULL;
+ vpx_free(pbi->h_event_start_decoding);
+ pbi->h_event_start_decoding = NULL;
}
if (pbi->mb_row_di)
@@ -504,43 +704,65 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
pbi->de_thread_data = NULL;
}
}
-
#else
(void) pbi;
#endif
}
-void vp8_start_lfthread(VP8D_COMP *pbi)
+void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
{
#if CONFIG_MULTITHREAD
- memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
- pbi->last_mb_row_decoded = 0;
- sem_post(&pbi->h_event_start_lpf);
-#else
- (void) pbi;
-#endif
-}
-
-void vp8_stop_lfthread(VP8D_COMP *pbi)
-{
-#if CONFIG_MULTITHREAD
- struct vpx_usec_timer timer;
-
- vpx_usec_timer_start(&timer);
-
- sem_wait(&pbi->h_event_lpf);
+ VP8_COMMON *cm = &pbi->common;
+ MACROBLOCKD *mbd = &pbi->mb;
+ /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/ /*frame_to_show;*/
+ loop_filter_info *lfi = cm->lf_info;
+ FRAME_TYPE frame_type = cm->frame_type;
+
+ /*int mb_row;
+ int mb_col;
+ int baseline_filter_level[MAX_MB_SEGMENTS];*/
+ int filter_level;
+ int alt_flt_enabled = mbd->segmentation_enabled;
+
+ int i;
+ /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
+
+ /* Note the baseline filter values for each segment */
+ if (alt_flt_enabled)
+ {
+ for (i = 0; i < MAX_MB_SEGMENTS; i++)
+ {
+ /* Abs value */
+ if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+ pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+ /* Delta Value */
+ else
+ {
+ pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+ pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
+ }
+ }
+ }
+ else
+ {
+ for (i = 0; i < MAX_MB_SEGMENTS; i++)
+ pbi->mt_baseline_filter_level[i] = default_filt_lvl;
+ }
- vpx_usec_timer_mark(&timer);
- pbi->time_loop_filtering += vpx_usec_timer_elapsed(&timer);
+ /* Initialize the loop filter for this frame. */
+ if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
+ vp8_init_loop_filter(cm);
+ else if (frame_type != cm->last_frame_type)
+ vp8_frame_init_loop_filter(lfi, frame_type);
#else
(void) pbi;
+ (void) default_filt_lvl;
#endif
}
-void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
- MACROBLOCKD *xd)
+void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
{
#if CONFIG_MULTITHREAD
int mb_row;
@@ -548,47 +770,212 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
int ibc = 0;
int num_part = 1 << pbi->common.multi_token_partition;
+ int i, j;
+ volatile int *last_row_current_mb_col = NULL;
+ int nsync = pbi->sync_range;
+
+ int filter_level;
+ loop_filter_info *lfi = pc->lf_info;
+ int alt_flt_enabled = xd->segmentation_enabled;
+ int Segment;
+
+ if(pbi->common.filter_level)
+ {
+ /* Set above_row buffer to 127 for decoding first MB row */
+ vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
+ vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
+ vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
+
+ for (i=1; i<pc->mb_rows; i++)
+ {
+ vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
+ vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
+ vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
+ }
+
+ /* Set left_col to 129 initially */
+ for (i=0; i<pc->mb_rows; i++)
+ {
+ vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
+ vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
+ vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
+ }
+ vp8mt_lpf_init(pbi, pc->filter_level);
+ }
vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
+ for (i = 0; i < pbi->decoding_thread_count; i++)
+ sem_post(&pbi->h_event_start_decoding[i]);
+
for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
{
int i;
- pbi->current_mb_col_main = -1;
-
- xd->current_bc = &pbi->mbc[ibc];
- ibc++ ;
- if (ibc == num_part)
- ibc = 0;
+ xd->current_bc = &pbi->mbc[mb_row%num_part];
- for (i = 0; i < pbi->decoding_thread_count; i++)
+ /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
{
- if ((mb_row + i + 1) >= pc->mb_rows)
- break;
+ int i;
+ int recon_yoffset, recon_uvoffset;
+ int mb_col;
+ int ref_fb_idx = pc->lst_fb_idx;
+ int dst_fb_idx = pc->new_fb_idx;
+ int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
- pbi->mb_row_di[i].mb_row = mb_row + i + 1;
- pbi->mb_row_di[i].mbd.current_bc = &pbi->mbc[ibc];
- ibc++;
+ /* volatile int *last_row_current_mb_col = NULL; */
+ if (mb_row > 0)
+ last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
- if (ibc == num_part)
- ibc = 0;
+ vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
+ recon_yoffset = mb_row * recon_y_stride * 16;
+ recon_uvoffset = mb_row * recon_uv_stride * 8;
+ /* reset above block coeffs */
- pbi->mb_row_di[i].current_mb_col = -1;
- sem_post(&pbi->h_event_mbrdecoding[i]);
- }
+ xd->above_context = pc->above_context;
+ xd->up_available = (mb_row != 0);
- vp8_decode_mb_row(pbi, pc, mb_row, xd);
+ xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+ xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
- xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
+ for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
+ {
+ if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
+ while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
+ {
+ x86_pause_hint();
+ thread_sleep(0);
+ }
+ }
- if (mb_row < pc->mb_rows - 1)
- {
- sem_wait(&pbi->h_event_main);
+ if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ BLOCKD *d = &xd->block[i];
+ vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
+ }
+ }
+
+ if(pbi->common.filter_level)
+ {
+ /* update loopfilter info */
+ Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+ filter_level = pbi->mt_baseline_filter_level[Segment];
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ * Apply any context driven MB level adjustment
+ */
+ vp8_adjust_mb_lf_value(xd, &filter_level);
+ }
+
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ */
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+
+ xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+
+ xd->left_available = (mb_col != 0);
+
+ /* Select the appropriate reference frame for this MB */
+ if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ ref_fb_idx = pc->lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+ ref_fb_idx = pc->gld_fb_idx;
+ else
+ ref_fb_idx = pc->alt_fb_idx;
+
+ xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+
+ vp8_build_uvmvs(xd, pc->full_pixel);
+ vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
+
+ if (pbi->common.filter_level)
+ {
+ /* Save decoded MB last row data for next-row decoding */
+ if(mb_row != pc->mb_rows-1)
+ {
+ vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
+ vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
+ vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
+ }
+
+ /* save left_col for next MB decoding */
+ if(mb_col != pc->mb_cols-1)
+ {
+ MODE_INFO *next = xd->mode_info_context +1;
+
+ if (xd->frame_type == KEY_FRAME || next->mbmi.ref_frame == INTRA_FRAME)
+ {
+ for (i = 0; i < 16; i++)
+ pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
+ for (i = 0; i < 8; i++)
+ {
+ pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
+ pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
+ }
+ }
+ }
+
+ /* loopfilter on this macroblock. */
+ if (filter_level)
+ {
+ if (mb_col > 0)
+ pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+ if (xd->mode_info_context->mbmi.dc_diff > 0)
+ pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+ if (xd->mode_info_context->mbmi.dc_diff > 0)
+ pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+ }
+ }
+
+ recon_yoffset += 16;
+ recon_uvoffset += 8;
+
+ ++xd->mode_info_context; /* next mb */
+
+ xd->above_context++;
+
+ pbi->mt_current_mb_col[mb_row] = mb_col;
+ }
+
+ /* adjust to the next row of mbs */
+ if (pbi->common.filter_level)
+ {
+ if(mb_row != pc->mb_rows-1)
+ {
+ int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
+ int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
+
+ for (i = 0; i < 4; i++)
+ {
+ pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
+ pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
+ pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
+ }
+ }
+ }else
+ vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+
+ ++xd->mode_info_context; /* skip prediction column */
}
+ xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
}
- pbi->last_mb_row_decoded = mb_row;
+ sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */
#else
(void) pbi;
(void) xd;
diff --git a/vp8/decoder/treereader.h b/vp8/decoder/treereader.h
index eb10e2460..277842896 100644
--- a/vp8/decoder/treereader.h
+++ b/vp8/decoder/treereader.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
index 02be4872e..0d6133a46 100644
--- a/vp8/decoder/x86/dequantize_mmx.asm
+++ b/vp8/decoder/x86/dequantize_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -49,12 +50,12 @@ sym(vp8_dequantize_b_impl_mmx):
ret
-;void dequant_idct_mmx(short *input, short *dq, short *output, int pitch)
-global sym(vp8_dequant_idct_mmx)
-sym(vp8_dequant_idct_mmx):
+;void dequant_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride)
+global sym(vp8_dequant_idct_add_mmx)
+sym(vp8_dequant_idct_add_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
@@ -76,7 +77,8 @@ sym(vp8_dequant_idct_mmx):
movq mm3, [rax+24]
pmullw mm3, [rdx+24]
- mov rdx, arg(2) ;output
+ mov rdx, arg(3) ;dest
+ mov rsi, arg(2) ;pred
pxor mm7, mm7
@@ -87,7 +89,8 @@ sym(vp8_dequant_idct_mmx):
movq [rax+24],mm7
- movsxd rax, dword ptr arg(3) ;pitch
+ movsxd rax, dword ptr arg(4) ;pitch
+ movsxd rdi, dword ptr arg(5) ;stride
psubw mm0, mm2 ; b1= 0-2
paddw mm2, mm2 ;
@@ -95,11 +98,11 @@ sym(vp8_dequant_idct_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
- pmulhw mm5, [x_s1sqr2 GLOBAL];
+ pmulhw mm5, [GLOBAL(x_s1sqr2)];
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
- pmulhw mm7, [x_c1sqr2less1 GLOBAL];
+ pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
@@ -107,10 +110,10 @@ sym(vp8_dequant_idct_mmx):
movq mm5, mm1
movq mm4, mm3
- pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
- pmulhw mm3, [x_s1sqr2 GLOBAL]
+ pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
@@ -150,11 +153,11 @@ sym(vp8_dequant_idct_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
- pmulhw mm5, [x_s1sqr2 GLOBAL];
+ pmulhw mm5, [GLOBAL(x_s1sqr2)];
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
- pmulhw mm7, [x_c1sqr2less1 GLOBAL];
+ pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
@@ -162,16 +165,16 @@ sym(vp8_dequant_idct_mmx):
movq mm5, mm1
movq mm4, mm3
- pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
- pmulhw mm3, [x_s1sqr2 GLOBAL]
+ pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
- paddw mm0, [fours GLOBAL]
+ paddw mm0, [GLOBAL(fours)]
- paddw mm2, [fours GLOBAL]
+ paddw mm2, [GLOBAL(fours)]
movq mm6, mm2 ; a1
movq mm4, mm0 ; b1
@@ -206,13 +209,34 @@ sym(vp8_dequant_idct_mmx):
punpckldq mm2, mm4 ; 32 22 12 02
punpckhdq mm5, mm4 ; 33 23 13 03
- movq [rdx], mm0
+ pxor mm7, mm7
- movq [rdx+rax], mm1
- movq [rdx+rax*2], mm2
+ movd mm4, [rsi]
+ punpcklbw mm4, mm7
+ paddsw mm0, mm4
+ packuswb mm0, mm7
+ movd [rdx], mm0
- add rdx, rax
- movq [rdx+rax*2], mm5
+ movd mm4, [rsi+rax]
+ punpcklbw mm4, mm7
+ paddsw mm1, mm4
+ packuswb mm1, mm7
+ movd [rdx+rdi], mm1
+
+ movd mm4, [rsi+2*rax]
+ punpcklbw mm4, mm7
+ paddsw mm2, mm4
+ packuswb mm2, mm7
+ movd [rdx+rdi*2], mm2
+
+ add rdx, rdi
+ add rsi, rax
+
+ movd mm4, [rsi+2*rax]
+ punpcklbw mm4, mm7
+ paddsw mm5, mm4
+ packuswb mm5, mm7
+ movd [rdx+rdi*2], mm5
; begin epilog
pop rdi
@@ -223,12 +247,12 @@ sym(vp8_dequant_idct_mmx):
ret
-;void dequant_dc_idct_mmx(short *input, short *dq, short *output, int pitch, int Dc)
-global sym(vp8_dequant_dc_idct_mmx)
-sym(vp8_dequant_dc_idct_mmx):
+;void dequant_dc_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc)
+global sym(vp8_dequant_dc_idct_add_mmx)
+sym(vp8_dequant_dc_idct_add_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
+ SHADOW_ARGS_TO_STACK 7
GET_GOT rbx
push rsi
push rdi
@@ -237,8 +261,6 @@ sym(vp8_dequant_dc_idct_mmx):
mov rax, arg(0) ;input
mov rdx, arg(1) ;dq
- movsxd rcx, dword ptr arg(4) ;Dc
-
movq mm0, [rax ]
pmullw mm0, [rdx]
@@ -251,7 +273,8 @@ sym(vp8_dequant_dc_idct_mmx):
movq mm3, [rax+24]
pmullw mm3, [rdx+24]
- mov rdx, arg(2) ;output
+ mov rdx, arg(3) ;dest
+ mov rsi, arg(2) ;pred
pxor mm7, mm7
@@ -261,8 +284,15 @@ sym(vp8_dequant_dc_idct_mmx):
movq [rax+16],mm7
movq [rax+24],mm7
- pinsrw mm0, rcx, 0
- movsxd rax, dword ptr arg(3) ;pitch
+ ; move lower word of Dc to lower word of mm0
+ psrlq mm0, 16
+ movzx rcx, word ptr arg(6) ;Dc
+ psllq mm0, 16
+ movq mm7, rcx
+ por mm0, mm7
+
+ movsxd rax, dword ptr arg(4) ;pitch
+ movsxd rdi, dword ptr arg(5) ;stride
psubw mm0, mm2 ; b1= 0-2
paddw mm2, mm2 ;
@@ -270,11 +300,11 @@ sym(vp8_dequant_dc_idct_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
- pmulhw mm5, [x_s1sqr2 GLOBAL];
+ pmulhw mm5, [GLOBAL(x_s1sqr2)];
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
- pmulhw mm7, [x_c1sqr2less1 GLOBAL];
+ pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
@@ -282,10 +312,10 @@ sym(vp8_dequant_dc_idct_mmx):
movq mm5, mm1
movq mm4, mm3
- pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
- pmulhw mm3, [x_s1sqr2 GLOBAL]
+ pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
@@ -325,11 +355,11 @@ sym(vp8_dequant_dc_idct_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
- pmulhw mm5, [x_s1sqr2 GLOBAL];
+ pmulhw mm5, [GLOBAL(x_s1sqr2)];
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
- pmulhw mm7, [x_c1sqr2less1 GLOBAL];
+ pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
@@ -337,16 +367,16 @@ sym(vp8_dequant_dc_idct_mmx):
movq mm5, mm1
movq mm4, mm3
- pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
- pmulhw mm3, [x_s1sqr2 GLOBAL]
+ pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
- paddw mm0, [fours GLOBAL]
+ paddw mm0, [GLOBAL(fours)]
- paddw mm2, [fours GLOBAL]
+ paddw mm2, [GLOBAL(fours)]
movq mm6, mm2 ; a1
movq mm4, mm0 ; b1
@@ -381,13 +411,34 @@ sym(vp8_dequant_dc_idct_mmx):
punpckldq mm2, mm4 ; 32 22 12 02
punpckhdq mm5, mm4 ; 33 23 13 03
- movq [rdx], mm0
-
- movq [rdx+rax], mm1
- movq [rdx+rax*2], mm2
-
- add rdx, rax
- movq [rdx+rax*2], mm5
+ pxor mm7, mm7
+
+ movd mm4, [rsi]
+ punpcklbw mm4, mm7
+ paddsw mm0, mm4
+ packuswb mm0, mm7
+ movd [rdx], mm0
+
+ movd mm4, [rsi+rax]
+ punpcklbw mm4, mm7
+ paddsw mm1, mm4
+ packuswb mm1, mm7
+ movd [rdx+rdi], mm1
+
+ movd mm4, [rsi+2*rax]
+ punpcklbw mm4, mm7
+ paddsw mm2, mm4
+ packuswb mm2, mm7
+ movd [rdx+rdi*2], mm2
+
+ add rdx, rdi
+ add rsi, rax
+
+ movd mm4, [rsi+2*rax]
+ punpcklbw mm4, mm7
+ paddsw mm5, mm4
+ packuswb mm5, mm7
+ movd [rdx+rdi*2], mm5
; begin epilog
pop rdi
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
index 5def406d3..dc68daab3 100644
--- a/vp8/decoder/x86/dequantize_x86.h
+++ b/vp8/decoder/x86/dequantize_x86.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -20,19 +21,48 @@
*/
#if HAVE_MMX
extern prototype_dequant_block(vp8_dequantize_b_mmx);
-extern prototype_dequant_idct(vp8_dequant_idct_mmx);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_mmx);
-
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_mmx);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_mmx);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_mmx);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_mmx);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_mmx
-#undef vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_mmx
+#undef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
+
+#undef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_mmx
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_mmx
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_mmx
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_mmx
+
+#endif
+#endif
+
+#if HAVE_SSE2
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_sse2);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_sse2);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_sse2);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_sse2
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_sse2
-#undef vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_mmx
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_sse2
#endif
#endif
diff --git a/vp8/decoder/x86/idct_blk_mmx.c b/vp8/decoder/x86/idct_blk_mmx.c
new file mode 100644
index 000000000..78c91d3d2
--- /dev/null
+++ b/vp8/decoder/x86/idct_blk_mmx.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_mmx
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs, short *dc)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_dc_idct_add_mmx (q, dq, pre, dst, 16, stride, dc[0]);
+ else
+ vp8_dc_only_idct_add_mmx (dc[0], pre, dst, 16, stride);
+
+ if (eobs[1] > 1)
+ vp8_dequant_dc_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+ else
+ vp8_dc_only_idct_add_mmx (dc[1], pre+4, dst+4, 16, stride);
+
+ if (eobs[2] > 1)
+ vp8_dequant_dc_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
+ else
+ vp8_dc_only_idct_add_mmx (dc[2], pre+8, dst+8, 16, stride);
+
+ if (eobs[3] > 1)
+ vp8_dequant_dc_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
+ else
+ vp8_dc_only_idct_add_mmx (dc[3], pre+12, dst+12, 16, stride);
+
+ q += 64;
+ dc += 4;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_y_block_mmx
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_mmx (q, dq, pre, dst, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dst, 16, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dst+4, 16, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ if (eobs[2] > 1)
+ vp8_dequant_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[32]*dq[0], pre+8, dst+8, 16, stride);
+ ((int *)(q+32))[0] = 0;
+ }
+
+ if (eobs[3] > 1)
+ vp8_dequant_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[48]*dq[0], pre+12, dst+12, 16, stride);
+ ((int *)(q+48))[0] = 0;
+ }
+
+ q += 64;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_mmx
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_mmx (q, dq, pre, dstu, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstu, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dstu+4, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstu += 4*stride;
+ eobs += 2;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_mmx (q, dq, pre, dstv, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstv, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dstv+4, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstv += 4*stride;
+ eobs += 2;
+ }
+}
diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/decoder/x86/idct_blk_sse2.c
new file mode 100644
index 000000000..0273d6ed2
--- /dev/null
+++ b/vp8/decoder/x86/idct_blk_sse2.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void idct_dequant_dc_0_2x_sse2
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int dst_stride, short *dc);
+void idct_dequant_dc_full_2x_sse2
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int dst_stride, short *dc);
+
+void idct_dequant_0_2x_sse2
+ (short *q, short *dq ,unsigned char *pre,
+ unsigned char *dst, int dst_stride, int blk_stride);
+void idct_dequant_full_2x_sse2
+ (short *q, short *dq ,unsigned char *pre,
+ unsigned char *dst, int dst_stride, int blk_stride);
+
+void vp8_dequant_dc_idct_add_y_block_sse2
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs, short *dc)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (((short *)(eobs))[0] & 0xfefe)
+ idct_dequant_dc_full_2x_sse2 (q, dq, pre, dst, stride, dc);
+ else
+ idct_dequant_dc_0_2x_sse2 (q, dq, pre, dst, stride, dc);
+
+ if (((short *)(eobs))[1] & 0xfefe)
+ idct_dequant_dc_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
+ else
+ idct_dequant_dc_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
+
+ q += 64;
+ dc += 4;
+ pre += 64;
+ dst += stride*4;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_y_block_sse2
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (((short *)(eobs))[0] & 0xfefe)
+ idct_dequant_full_2x_sse2 (q, dq, pre, dst, stride, 16);
+ else
+ idct_dequant_0_2x_sse2 (q, dq, pre, dst, stride, 16);
+
+ if (((short *)(eobs))[1] & 0xfefe)
+ idct_dequant_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
+ else
+ idct_dequant_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
+
+ q += 64;
+ pre += 64;
+ dst += stride*4;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_sse2
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ if (((short *)(eobs))[0] & 0xfefe)
+ idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
+ else
+ idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
+
+ q += 32;
+ pre += 32;
+ dstu += stride*4;
+
+ if (((short *)(eobs))[1] & 0xfefe)
+ idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
+ else
+ idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
+
+ q += 32;
+ pre += 32;
+
+ if (((short *)(eobs))[2] & 0xfefe)
+ idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
+ else
+ idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
+
+ q += 32;
+ pre += 32;
+ dstv += stride*4;
+
+ if (((short *)(eobs))[3] & 0xfefe)
+ idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
+ else
+ idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
+}
diff --git a/vp8/decoder/x86/onyxdxv.c b/vp8/decoder/x86/onyxdxv.c
index 75a676a07..50293c792 100644
--- a/vp8/decoder/x86/onyxdxv.c
+++ b/vp8/decoder/x86/onyxdxv.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index 6d7cc3666..47e346dd9 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -38,14 +39,24 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
#if CONFIG_RUNTIME_CPU_DETECT
/* Override default functions with fastest ones for this CPU. */
#if HAVE_MMX
-
if (flags & HAS_MMX)
{
- pbi->dequant.block = vp8_dequantize_b_mmx;
- pbi->dequant.idct = vp8_dequant_idct_mmx;
- pbi->dequant.idct_dc = vp8_dequant_dc_idct_mmx;
+ pbi->dequant.block = vp8_dequantize_b_mmx;
+ pbi->dequant.idct_add = vp8_dequant_idct_add_mmx;
+ pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_mmx;
+ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_mmx;
+ pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
+ pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
+ }
+#endif
+#if HAVE_SSE2
+ if (flags & HAS_SSE2)
+ {
+ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_sse2;
+ pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
+ pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
}
-
#endif
+
#endif
}
diff --git a/vp8/decoder/xprintf.c b/vp8/decoder/xprintf.c
deleted file mode 100644
index cb2221c15..000000000
--- a/vp8/decoder/xprintf.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-* Module Title : xprintf.cpp
-*
-* Description : Display a printf style message on the current video frame.
-*
-****************************************************************************/
-
-/****************************************************************************
-* Header Files
-****************************************************************************/
-
-#include <stdio.h>
-#include <stdarg.h>
-#ifdef _WIN32_WCE
-#include <windows.h>
-#endif
-#include "xprintf.h"
-
-/****************************************************************************
- *
- * ROUTINE : xprintf
- *
- * INPUTS : const PB_INSTANCE *ppbi : Pointer to decoder instance.
- * long n_pixel : Offset into buffer to write text.
- * const char *format : Format string for print.
- * ... : Variable length argument list.
- *
- * OUTPUTS : None.
- *
- * RETURNS : int: Size (in bytes) of the formatted text.
- *
- * FUNCTION : Display a printf style message on the current video frame.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-int onyx_xprintf(unsigned char *ppbuffer, long n_pixel, long n_size, long n_stride, const char *format, ...)
-{
- BOOL b_rc;
- va_list arglist;
- HFONT hfont, hfonto;
-
- int rc = 0;
- char sz_formatted[256] = "";
- unsigned char *p_dest = &ppbuffer[n_pixel];
-
-#ifdef _WIN32_WCE
- // Set up temporary bitmap
- HDC hdc_memory = NULL;
- HBITMAP hbm_temp = NULL;
- HBITMAP hbm_orig = NULL;
-
- RECT rect;
-
- // Copy bitmap to video frame
- long x;
- long y;
-
- // Format text
- va_start(arglist, format);
- _vsnprintf(sz_formatted, sizeof(sz_formatted), format, arglist);
- va_end(arglist);
-
- rect.left = 0;
- rect.top = 0;
- rect.right = 8 * strlen(sz_formatted);
- rect.bottom = 8;
-
- hdc_memory = create_compatible_dc(NULL);
-
- if (hdc_memory == NULL)
- goto Exit;
-
- hbm_temp = create_bitmap(rect.right, rect.bottom, 1, 1, NULL);
-
- if (hbm_temp == NULL)
- goto Exit;
-
- hbm_orig = (HBITMAP)(select_object(hdc_memory, hbm_temp));
-
- if (!hbm_orig)
- goto Exit;
-
- // Write text into bitmap
- // font?
- hfont = create_font(8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, VARIABLE_PITCH | FF_SWISS, "");
-
- if (hfont == NULL)
- goto Exit;
-
- hfonto = (HFONT)(select_object(hdc_memory, hbm_temp));
-
- if (!hfonto)
- goto Exit;
-
- select_object(hdc_memory, hfont);
- set_text_color(hdc_memory, 1);
- set_bk_color(hdc_memory, 0);
- set_bk_mode(hdc_memory, TRANSPARENT);
-
- b_rc = bit_blt(hdc_memory, rect.left, rect.top, rect.right, rect.bottom, hdc_memory, rect.left, rect.top, BLACKNESS);
-
- if (!b_rc)
- goto Exit;
-
- b_rc = ext_text_out(hdc_memory, 0, 0, ETO_CLIPPED, &rect, sz_formatted, strlen(sz_formatted), NULL);
-
- if (!b_rc)
- goto Exit;
-
- for (y = rect.top; y < rect.bottom; ++y)
- {
- for (x = rect.left; x < rect.right; ++x)
- {
- if (get_pixel(hdc_memory, x, rect.bottom - 1 - y))
- p_dest[x] = 255;
- }
-
- p_dest += n_stride;
- }
-
- rc = strlen(sz_formatted);
-
-Exit:
-
- if (hbm_temp != NULL)
- {
- if (hbm_orig != NULL)
- {
- select_object(hdc_memory, hbm_orig);
- }
-
- delete_object(hbm_temp);
- }
-
- if (hfont != NULL)
- {
- if (hfonto != NULL)
- select_object(hdc_memory, hfonto);
-
- delete_object(hfont);
- }
-
- if (hdc_memory != NULL)
- delete_dc(hdc_memory);
-
- hdc_memory = 0;
-
-#endif
-
- return rc;
-}
diff --git a/vp8/decoder/xprintf.h b/vp8/decoder/xprintf.h
deleted file mode 100644
index 2f175e943..000000000
--- a/vp8/decoder/xprintf.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-* Module Title : xprintf.h
-*
-* Description : Debug print interface header file.
-*
-****************************************************************************/
-#ifndef __INC_XPRINTF_H
-#define __INC_XPRINTF_H
-
-/****************************************************************************
-* Header Files
-****************************************************************************/
-
-/****************************************************************************
-* Functions
-****************************************************************************/
-
-// Display a printf style message on the current video frame
-extern int onyx_xprintf(unsigned char *ppbuffer, long n_pixel, long n_size, long n_stride, const char *format, ...);
-
-#endif